Linux Audio

Check our new training course

Loading...
v3.1
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
 
 
  27#include <linux/capability.h>
  28#include <linux/errno.h>
 
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <linux/slab.h>
 
 
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
 
  54#include <net/xfrm.h>
  55#include <net/netevent.h>
  56#include <net/netlink.h>
  57
  58#include <asm/uaccess.h>
 
 
 
 
 
  59
  60#ifdef CONFIG_SYSCTL
  61#include <linux/sysctl.h>
  62#endif
  63
  64/* Set to 3 to get tracing. */
  65#define RT6_DEBUG 2
  66
  67#if RT6_DEBUG >= 3
  68#define RDBG(x) printk x
  69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  70#else
  71#define RDBG(x)
  72#define RT6_TRACE(x...) do { ; } while (0)
  73#endif
 
 
 
 
  74
  75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
  76				    const struct in6_addr *dest);
  77static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  79static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
  80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 
 
  81static void		ip6_dst_destroy(struct dst_entry *);
  82static void		ip6_dst_ifdown(struct dst_entry *,
  83				       struct net_device *dev, int how);
  84static int		 ip6_dst_gc(struct dst_ops *ops);
  85
  86static int		ip6_pkt_discard(struct sk_buff *skb);
  87static int		ip6_pkt_discard_out(struct sk_buff *skb);
 
 
  88static void		ip6_link_failure(struct sk_buff *skb);
  89static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  90
  91#ifdef CONFIG_IPV6_ROUTE_INFO
  92static struct rt6_info *rt6_add_route_info(struct net *net,
  93					   const struct in6_addr *prefix, int prefixlen,
  94					   const struct in6_addr *gwaddr, int ifindex,
  95					   unsigned pref);
  96static struct rt6_info *rt6_get_route_info(struct net *net,
 
  97					   const struct in6_addr *prefix, int prefixlen,
  98					   const struct in6_addr *gwaddr, int ifindex);
 
  99#endif
 100
 101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 
 
 
 
 
 
 
 
 102{
 103	struct rt6_info *rt = (struct rt6_info *) dst;
 104	struct inet_peer *peer;
 105	u32 *p = NULL;
 106
 107	if (!(rt->dst.flags & DST_HOST))
 108		return NULL;
 109
 110	if (!rt->rt6i_peer)
 111		rt6_bind_peer(rt, 1);
 
 
 112
 113	peer = rt->rt6i_peer;
 114	if (peer) {
 115		u32 *old_p = __DST_METRICS_PTR(old);
 116		unsigned long prev, new;
 117
 118		p = peer->metrics;
 119		if (inet_metrics_new(peer))
 120			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 
 
 121
 122		new = (unsigned long) p;
 123		prev = cmpxchg(&dst->_metrics, old, new);
 
 
 
 
 
 
 
 
 124
 125		if (prev != old) {
 126			p = __DST_METRICS_PTR(prev);
 127			if (prev & DST_METRICS_READ_ONLY)
 128				p = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 129		}
 
 130	}
 131	return p;
 132}
 133
 134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 
 
 135{
 136	return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 137}
 138
 139static struct dst_ops ip6_dst_ops_template = {
 140	.family			=	AF_INET6,
 141	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 142	.gc			=	ip6_dst_gc,
 143	.gc_thresh		=	1024,
 144	.check			=	ip6_dst_check,
 145	.default_advmss		=	ip6_default_advmss,
 146	.default_mtu		=	ip6_default_mtu,
 147	.cow_metrics		=	ipv6_cow_metrics,
 148	.destroy		=	ip6_dst_destroy,
 149	.ifdown			=	ip6_dst_ifdown,
 150	.negative_advice	=	ip6_negative_advice,
 151	.link_failure		=	ip6_link_failure,
 152	.update_pmtu		=	ip6_rt_update_pmtu,
 
 153	.local_out		=	__ip6_local_out,
 154	.neigh_lookup		=	ip6_neigh_lookup,
 
 155};
 156
 157static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 158{
 159	return 0;
 160}
 161
 162static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 163{
 164}
 165
 166static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 167					 unsigned long old)
 168{
 169	return NULL;
 170}
 171
 172static struct dst_ops ip6_dst_blackhole_ops = {
 173	.family			=	AF_INET6,
 174	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 175	.destroy		=	ip6_dst_destroy,
 176	.check			=	ip6_dst_check,
 177	.default_mtu		=	ip6_blackhole_default_mtu,
 178	.default_advmss		=	ip6_default_advmss,
 179	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 180	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 181	.neigh_lookup		=	ip6_neigh_lookup,
 182};
 183
 184static const u32 ip6_template_metrics[RTAX_MAX] = {
 185	[RTAX_HOPLIMIT - 1] = 255,
 
 
 
 
 
 
 
 
 
 186};
 187
 188static struct rt6_info ip6_null_entry_template = {
 189	.dst = {
 190		.__refcnt	= ATOMIC_INIT(1),
 191		.__use		= 1,
 192		.obsolete	= -1,
 193		.error		= -ENETUNREACH,
 194		.input		= ip6_pkt_discard,
 195		.output		= ip6_pkt_discard_out,
 196	},
 197	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 198	.rt6i_protocol  = RTPROT_KERNEL,
 199	.rt6i_metric	= ~(u32) 0,
 200	.rt6i_ref	= ATOMIC_INIT(1),
 201};
 202
 203#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 204
 205static int ip6_pkt_prohibit(struct sk_buff *skb);
 206static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 207
 208static struct rt6_info ip6_prohibit_entry_template = {
 209	.dst = {
 210		.__refcnt	= ATOMIC_INIT(1),
 211		.__use		= 1,
 212		.obsolete	= -1,
 213		.error		= -EACCES,
 214		.input		= ip6_pkt_prohibit,
 215		.output		= ip6_pkt_prohibit_out,
 216	},
 217	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 218	.rt6i_protocol  = RTPROT_KERNEL,
 219	.rt6i_metric	= ~(u32) 0,
 220	.rt6i_ref	= ATOMIC_INIT(1),
 221};
 222
 223static struct rt6_info ip6_blk_hole_entry_template = {
 224	.dst = {
 225		.__refcnt	= ATOMIC_INIT(1),
 226		.__use		= 1,
 227		.obsolete	= -1,
 228		.error		= -EINVAL,
 229		.input		= dst_discard,
 230		.output		= dst_discard,
 231	},
 232	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 233	.rt6i_protocol  = RTPROT_KERNEL,
 234	.rt6i_metric	= ~(u32) 0,
 235	.rt6i_ref	= ATOMIC_INIT(1),
 236};
 237
 238#endif
 239
 
 
 
 
 
 240/* allocate dst with ip6_dst_ops */
 241static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 242					     struct net_device *dev,
 243					     int flags)
 244{
 245	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 246
 247	if (rt != NULL)
 248		memset(&rt->rt6i_table, 0,
 249			sizeof(*rt) - sizeof(struct dst_entry));
 
 250
 251	return rt;
 252}
 
 253
 254static void ip6_dst_destroy(struct dst_entry *dst)
 255{
 256	struct rt6_info *rt = (struct rt6_info *)dst;
 257	struct inet6_dev *idev = rt->rt6i_idev;
 258	struct inet_peer *peer = rt->rt6i_peer;
 259
 260	if (!(rt->dst.flags & DST_HOST))
 261		dst_destroy_metrics_generic(dst);
 262
 263	if (idev != NULL) {
 
 264		rt->rt6i_idev = NULL;
 265		in6_dev_put(idev);
 266	}
 267	if (peer) {
 268		rt->rt6i_peer = NULL;
 269		inet_putpeer(peer);
 270	}
 271}
 272
 273static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 
 
 
 
 
 
 274
 275static u32 rt6_peer_genid(void)
 
 
 
 
 
 
 
 276{
 277	return atomic_read(&__rt6_peer_genid);
 
 
 
 278}
 279
 280void rt6_bind_peer(struct rt6_info *rt, int create)
 281{
 282	struct inet_peer *peer;
 283
 284	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
 285	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 286		inet_putpeer(peer);
 287	else
 288		rt->rt6i_peer_genid = rt6_peer_genid();
 
 
 
 
 
 289}
 290
 291static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 292			   int how)
 
 293{
 294	struct rt6_info *rt = (struct rt6_info *)dst;
 295	struct inet6_dev *idev = rt->rt6i_idev;
 296	struct net_device *loopback_dev =
 297		dev_net(dev)->loopback_dev;
 298
 299	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 300		struct inet6_dev *loopback_idev =
 301			in6_dev_get(loopback_dev);
 302		if (loopback_idev != NULL) {
 303			rt->rt6i_idev = loopback_idev;
 304			in6_dev_put(idev);
 305		}
 
 
 
 
 
 
 
 
 
 
 
 
 306	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 307}
 308
 309static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 
 
 
 
 310{
 311	return (rt->rt6i_flags & RTF_EXPIRES) &&
 312		time_after(jiffies, rt->rt6i_expires);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 313}
 314
 315static inline int rt6_need_strict(const struct in6_addr *daddr)
 
 
 
 
 
 
 
 
 316{
 317	return ipv6_addr_type(daddr) &
 318		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 319}
 320
 321/*
 322 *	Route lookup. Any table->tb6_lock is implied.
 323 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 324
 325static inline struct rt6_info *rt6_device_match(struct net *net,
 326						    struct rt6_info *rt,
 327						    const struct in6_addr *saddr,
 328						    int oif,
 329						    int flags)
 330{
 331	struct rt6_info *local = NULL;
 332	struct rt6_info *sprt;
 333
 334	if (!oif && ipv6_addr_any(saddr))
 335		goto out;
 336
 337	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 338		struct net_device *dev = sprt->rt6i_dev;
 339
 340		if (oif) {
 341			if (dev->ifindex == oif)
 342				return sprt;
 343			if (dev->flags & IFF_LOOPBACK) {
 344				if (sprt->rt6i_idev == NULL ||
 345				    sprt->rt6i_idev->dev->ifindex != oif) {
 346					if (flags & RT6_LOOKUP_F_IFACE && oif)
 347						continue;
 348					if (local && (!oif ||
 349						      local->rt6i_idev->dev->ifindex == oif))
 350						continue;
 351				}
 352				local = sprt;
 353			}
 354		} else {
 355			if (ipv6_chk_addr(net, saddr, dev,
 356					  flags & RT6_LOOKUP_F_IFACE))
 357				return sprt;
 
 
 
 
 358		}
 359	}
 360
 361	if (oif) {
 362		if (local)
 363			return local;
 
 
 364
 365		if (flags & RT6_LOOKUP_F_IFACE)
 366			return net->ipv6.ip6_null_entry;
 
 
 
 
 
 
 
 
 
 367	}
 368out:
 369	return rt;
 
 
 
 
 
 
 
 
 370}
 371
 372#ifdef CONFIG_IPV6_ROUTER_PREF
 373static void rt6_probe(struct rt6_info *rt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 374{
 
 
 
 375	struct neighbour *neigh;
 
 
 
 376	/*
 377	 * Okay, this does not seem to be appropriate
 378	 * for now, however, we need to check if it
 379	 * is really so; aka Router Reachability Probing.
 380	 *
 381	 * Router Reachability Probe MUST be rate-limited
 382	 * to no more than one per minute.
 383	 */
 
 
 
 
 
 384	rcu_read_lock();
 385	neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
 386	if (!neigh || (neigh->nud_state & NUD_VALID))
 387		goto out;
 388	read_lock_bh(&neigh->lock);
 389	if (!(neigh->nud_state & NUD_VALID) &&
 390	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 391		struct in6_addr mcaddr;
 392		struct in6_addr *target;
 393
 394		neigh->updated = jiffies;
 395		read_unlock_bh(&neigh->lock);
 396
 397		target = (struct in6_addr *)&neigh->primary_key;
 398		addrconf_addr_solict_mult(target, &mcaddr);
 399		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 
 
 
 
 
 
 
 
 
 400	} else {
 401		read_unlock_bh(&neigh->lock);
 
 
 
 
 402	}
 
 403out:
 404	rcu_read_unlock();
 405}
 406#else
 407static inline void rt6_probe(struct rt6_info *rt)
 408{
 409}
 410#endif
 411
 412/*
 413 * Default Router Selection (RFC 2461 6.3.6)
 414 */
 415static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 416{
 417	struct net_device *dev = rt->rt6i_dev;
 418	if (!oif || dev->ifindex == oif)
 419		return 2;
 420	if ((dev->flags & IFF_LOOPBACK) &&
 421	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 422		return 1;
 423	return 0;
 424}
 425
 426static inline int rt6_check_neigh(struct rt6_info *rt)
 427{
 
 428	struct neighbour *neigh;
 429	int m;
 430
 431	rcu_read_lock();
 432	neigh = dst_get_neighbour(&rt->dst);
 433	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 434	    !(rt->rt6i_flags & RTF_GATEWAY))
 435		m = 1;
 436	else if (neigh) {
 437		read_lock_bh(&neigh->lock);
 438		if (neigh->nud_state & NUD_VALID)
 439			m = 2;
 440#ifdef CONFIG_IPV6_ROUTER_PREF
 441		else if (neigh->nud_state & NUD_FAILED)
 442			m = 0;
 443#endif
 444		else
 445			m = 1;
 446		read_unlock_bh(&neigh->lock);
 447	} else
 448		m = 0;
 
 
 449	rcu_read_unlock();
 450	return m;
 
 451}
 452
 453static int rt6_score_route(struct rt6_info *rt, int oif,
 454			   int strict)
 455{
 456	int m, n;
 
 
 
 457
 458	m = rt6_check_dev(rt, oif);
 459	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 460		return -1;
 461#ifdef CONFIG_IPV6_ROUTER_PREF
 462	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 463#endif
 464	n = rt6_check_neigh(rt);
 465	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 466		return -1;
 
 
 
 467	return m;
 468}
 469
 470static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 471				   int *mpri, struct rt6_info *match)
 472{
 
 
 473	int m;
 474
 475	if (rt6_check_expired(rt))
 
 
 
 
 
 476		goto out;
 477
 478	m = rt6_score_route(rt, oif, strict);
 479	if (m < 0)
 
 
 
 480		goto out;
 
 481
 
 
 
 
 482	if (m > *mpri) {
 483		if (strict & RT6_LOOKUP_F_REACHABLE)
 484			rt6_probe(match);
 485		*mpri = m;
 486		match = rt;
 487	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
 488		rt6_probe(rt);
 489	}
 490
 491out:
 492	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 493}
 494
 495static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 496				     struct rt6_info *rr_head,
 497				     u32 metric, int oif, int strict)
 
 498{
 499	struct rt6_info *rt, *match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 500	int mpri = -1;
 501
 502	match = NULL;
 503	for (rt = rr_head; rt && rt->rt6i_metric == metric;
 504	     rt = rt->dst.rt6_next)
 505		match = find_match(rt, oif, strict, &mpri, match);
 506	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 507	     rt = rt->dst.rt6_next)
 508		match = find_match(rt, oif, strict, &mpri, match);
 
 509
 510	return match;
 
 511}
 512
 513static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 514{
 515	struct rt6_info *match, *rt0;
 516	struct net *net;
 
 
 517
 518	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 519		  __func__, fn->leaf, oif);
 520
 521	rt0 = fn->rr_ptr;
 
 
 
 522	if (!rt0)
 523		fn->rr_ptr = rt0 = fn->leaf;
 524
 525	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 
 
 
 
 
 
 
 
 
 
 526
 527	if (!match &&
 528	    (strict & RT6_LOOKUP_F_REACHABLE)) {
 529		struct rt6_info *next = rt0->dst.rt6_next;
 530
 531		/* no entries matched; do round-robin */
 532		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 533			next = fn->leaf;
 534
 535		if (next != rt0)
 536			fn->rr_ptr = next;
 
 
 
 
 
 537	}
 538
 539	RT6_TRACE("%s() => %p\n",
 540		  __func__, match);
 
 
 
 
 
 
 541
 542	net = dev_net(rt0->rt6i_dev);
 543	return match ? match : net->ipv6.ip6_null_entry;
 
 
 544}
 545
 546#ifdef CONFIG_IPV6_ROUTE_INFO
 547int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 548		  const struct in6_addr *gwaddr)
 549{
 550	struct net *net = dev_net(dev);
 551	struct route_info *rinfo = (struct route_info *) opt;
 552	struct in6_addr prefix_buf, *prefix;
 
 553	unsigned int pref;
 554	unsigned long lifetime;
 555	struct rt6_info *rt;
 556
 557	if (len < sizeof(struct route_info)) {
 558		return -EINVAL;
 559	}
 560
 561	/* Sanity check for prefix_len and length */
 562	if (rinfo->length > 3) {
 563		return -EINVAL;
 564	} else if (rinfo->prefix_len > 128) {
 565		return -EINVAL;
 566	} else if (rinfo->prefix_len > 64) {
 567		if (rinfo->length < 2) {
 568			return -EINVAL;
 569		}
 570	} else if (rinfo->prefix_len > 0) {
 571		if (rinfo->length < 1) {
 572			return -EINVAL;
 573		}
 574	}
 575
 576	pref = rinfo->route_pref;
 577	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 578		return -EINVAL;
 579
 580	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 581
 582	if (rinfo->length == 3)
 583		prefix = (struct in6_addr *)rinfo->prefix;
 584	else {
 585		/* this function is safe */
 586		ipv6_addr_prefix(&prefix_buf,
 587				 (struct in6_addr *)rinfo->prefix,
 588				 rinfo->prefix_len);
 589		prefix = &prefix_buf;
 590	}
 591
 592	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 593				dev->ifindex);
 
 
 
 594
 595	if (rt && !lifetime) {
 596		ip6_del_rt(rt);
 597		rt = NULL;
 598	}
 599
 600	if (!rt && lifetime)
 601		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 602					pref);
 603	else if (rt)
 604		rt->rt6i_flags = RTF_ROUTEINFO |
 605				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 606
 607	if (rt) {
 
 
 
 608		if (!addrconf_finite_timeout(lifetime)) {
 609			rt->rt6i_flags &= ~RTF_EXPIRES;
 
 610		} else {
 611			rt->rt6i_expires = jiffies + HZ * lifetime;
 612			rt->rt6i_flags |= RTF_EXPIRES;
 613		}
 614		dst_release(&rt->dst);
 
 
 
 615	}
 616	return 0;
 617}
 618#endif
 619
 620#define BACKTRACK(__net, saddr)			\
 621do { \
 622	if (rt == __net->ipv6.ip6_null_entry) {	\
 623		struct fib6_node *pn; \
 624		while (1) { \
 625			if (fn->fn_flags & RTN_TL_ROOT) \
 626				goto out; \
 627			pn = fn->parent; \
 628			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 629				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 630			else \
 631				fn = pn; \
 632			if (fn->fn_flags & RTN_RTINFO) \
 633				goto restart; \
 634		} \
 635	} \
 636} while(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 637
 638static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 639					     struct fib6_table *table,
 640					     struct flowi6 *fl6, int flags)
 
 
 641{
 
 642	struct fib6_node *fn;
 643	struct rt6_info *rt;
 644
 645	read_lock_bh(&table->tb6_lock);
 646	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 647restart:
 648	rt = fn->leaf;
 649	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 650	BACKTRACK(net, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 651out:
 652	dst_use(&rt->dst, jiffies);
 653	read_unlock_bh(&table->tb6_lock);
 
 
 654	return rt;
 
 655
 
 
 
 
 656}
 
 657
 658struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 659			    const struct in6_addr *saddr, int oif, int strict)
 
 660{
 661	struct flowi6 fl6 = {
 662		.flowi6_oif = oif,
 663		.daddr = *daddr,
 664	};
 665	struct dst_entry *dst;
 666	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 667
 668	if (saddr) {
 669		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 670		flags |= RT6_LOOKUP_F_HAS_SADDR;
 671	}
 672
 673	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 674	if (dst->error == 0)
 675		return (struct rt6_info *) dst;
 676
 677	dst_release(dst);
 678
 679	return NULL;
 680}
 681
 682EXPORT_SYMBOL(rt6_lookup);
 683
 684/* ip6_ins_rt is called with FREE table->tb6_lock.
 685   It takes new route entry, the addition fails by any reason the
 686   route is freed. In any case, if caller does not hold it, it may
 687   be destroyed.
 688 */
 689
 690static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 691{
 692	int err;
 693	struct fib6_table *table;
 694
 695	table = rt->rt6i_table;
 696	write_lock_bh(&table->tb6_lock);
 697	err = fib6_add(&table->tb6_root, rt, info);
 698	write_unlock_bh(&table->tb6_lock);
 699
 700	return err;
 701}
 702
 703int ip6_ins_rt(struct rt6_info *rt)
 704{
 705	struct nl_info info = {
 706		.nl_net = dev_net(rt->rt6i_dev),
 707	};
 708	return __ip6_ins_rt(rt, &info);
 709}
 710
 711static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 712				      const struct in6_addr *daddr,
 713				      const struct in6_addr *saddr)
 714{
 
 
 715	struct rt6_info *rt;
 716
 717	/*
 718	 *	Clone the route.
 719	 */
 720
 721	rt = ip6_rt_copy(ort, daddr);
 722
 723	if (rt) {
 724		struct neighbour *neigh;
 725		int attempts = !in_softirq();
 726
 727		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 728			if (rt->rt6i_dst.plen != 128 &&
 729			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 730				rt->rt6i_flags |= RTF_ANYCAST;
 731			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 732		}
 733
 734		rt->rt6i_flags |= RTF_CACHE;
 
 
 
 735
 
 
 
 
 736#ifdef CONFIG_IPV6_SUBTREES
 737		if (rt->rt6i_src.plen && saddr) {
 738			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 739			rt->rt6i_src.plen = 128;
 740		}
 741#endif
 
 742
 743	retry:
 744		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 745		if (IS_ERR(neigh)) {
 746			struct net *net = dev_net(rt->rt6i_dev);
 747			int saved_rt_min_interval =
 748				net->ipv6.sysctl.ip6_rt_gc_min_interval;
 749			int saved_rt_elasticity =
 750				net->ipv6.sysctl.ip6_rt_gc_elasticity;
 751
 752			if (attempts-- > 0) {
 753				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 754				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 755
 756				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 757
 758				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 759					saved_rt_elasticity;
 760				net->ipv6.sysctl.ip6_rt_gc_min_interval =
 761					saved_rt_min_interval;
 762				goto retry;
 763			}
 764
 765			if (net_ratelimit())
 766				printk(KERN_WARNING
 767				       "ipv6: Neighbour table overflow.\n");
 768			dst_free(&rt->dst);
 769			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 770		}
 771		dst_set_neighbour(&rt->dst, neigh);
 772
 
 773	}
 774
 775	return rt;
 776}
 777
 778static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 779					const struct in6_addr *daddr)
 780{
 781	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 782
 783	if (rt) {
 784		rt->rt6i_flags |= RTF_CACHE;
 785		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
 
 
 
 
 
 
 
 
 
 
 786	}
 787	return rt;
 
 788}
 789
 790static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 791				      struct flowi6 *fl6, int flags)
 
 
 
 
 
 
 
 792{
 793	struct fib6_node *fn;
 794	struct rt6_info *rt, *nrt;
 795	int strict = 0;
 796	int attempts = 3;
 797	int err;
 798	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 799
 800	strict |= flags & RT6_LOOKUP_F_IFACE;
 
 801
 802relookup:
 803	read_lock_bh(&table->tb6_lock);
 804
 805restart_2:
 806	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 807
 808restart:
 809	rt = rt6_select(fn, oif, strict | reachable);
 
 
 
 
 810
 811	BACKTRACK(net, &fl6->saddr);
 812	if (rt == net->ipv6.ip6_null_entry ||
 813	    rt->rt6i_flags & RTF_CACHE)
 814		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 815
 816	dst_hold(&rt->dst);
 817	read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 818
 819	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
 820		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 821	else if (!(rt->dst.flags & DST_HOST))
 822		nrt = rt6_alloc_clone(rt, &fl6->daddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 823	else
 824		goto out2;
 825
 826	dst_release(&rt->dst);
 827	rt = nrt ? : net->ipv6.ip6_null_entry;
 
 828
 829	dst_hold(&rt->dst);
 830	if (nrt) {
 831		err = ip6_ins_rt(nrt);
 832		if (!err)
 833			goto out2;
 834	}
 835
 836	if (--attempts <= 0)
 837		goto out2;
 838
 839	/*
 840	 * Race condition! In the gap, when table->tb6_lock was
 841	 * released someone could insert this route.  Relookup.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 842	 */
 843	dst_release(&rt->dst);
 844	goto relookup;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 845
 846out:
 847	if (reachable) {
 848		reachable = 0;
 849		goto restart_2;
 
 
 
 
 
 850	}
 851	dst_hold(&rt->dst);
 852	read_unlock_bh(&table->tb6_lock);
 853out2:
 854	rt->dst.lastuse = jiffies;
 855	rt->dst.__use++;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 856
 857	return rt;
 858}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 859
 860static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 861					    struct flowi6 *fl6, int flags)
 
 862{
 863	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 864}
 865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 866void ip6_route_input(struct sk_buff *skb)
 867{
 868	const struct ipv6hdr *iph = ipv6_hdr(skb);
 869	struct net *net = dev_net(skb->dev);
 870	int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 871	struct flowi6 fl6 = {
 872		.flowi6_iif = skb->dev->ifindex,
 873		.daddr = iph->daddr,
 874		.saddr = iph->saddr,
 875		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 876		.flowi6_mark = skb->mark,
 877		.flowi6_proto = iph->nexthdr,
 878	};
 
 879
 880	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 881		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
 882
 883	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 
 
 
 
 884}
 885
 886static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 887					     struct flowi6 *fl6, int flags)
 
 
 
 888{
 889	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 890}
 891
 892struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 893				    struct flowi6 *fl6)
 
 
 894{
 895	int flags = 0;
 
 
 
 
 
 
 
 
 
 
 896
 897	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 
 
 
 
 
 898		flags |= RT6_LOOKUP_F_IFACE;
 899
 900	if (!ipv6_addr_any(&fl6->saddr))
 901		flags |= RT6_LOOKUP_F_HAS_SADDR;
 902	else if (sk)
 903		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 904
 905	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 906}
 907
 908EXPORT_SYMBOL(ip6_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 909
 910struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 911{
 912	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
 913	struct dst_entry *new = NULL;
 914
 915	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
 
 916	if (rt) {
 917		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
 918
 919		new = &rt->dst;
 920
 921		new->__use = 1;
 922		new->input = dst_discard;
 923		new->output = dst_discard;
 924
 925		if (dst_metrics_read_only(&ort->dst))
 926			new->_metrics = ort->dst._metrics;
 927		else
 928			dst_copy_metrics(new, &ort->dst);
 929		rt->rt6i_idev = ort->rt6i_idev;
 930		if (rt->rt6i_idev)
 931			in6_dev_hold(rt->rt6i_idev);
 932		rt->rt6i_expires = 0;
 933
 934		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 935		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 936		rt->rt6i_metric = 0;
 937
 938		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 939#ifdef CONFIG_IPV6_SUBTREES
 940		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 941#endif
 942
 943		dst_free(new);
 944	}
 945
 946	dst_release(dst_orig);
 947	return new ? new : ERR_PTR(-ENOMEM);
 948}
 949
 950/*
 951 *	Destination cache support functions
 952 */
 953
 954static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 955{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 956	struct rt6_info *rt;
 957
 958	rt = (struct rt6_info *) dst;
 
 
 
 959
 960	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 961		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 962			if (!rt->rt6i_peer)
 963				rt6_bind_peer(rt, 0);
 964			rt->rt6i_peer_genid = rt6_peer_genid();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 965		}
 966		return dst;
 
 967	}
 968	return NULL;
 969}
 970
 971static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 972{
 973	struct rt6_info *rt = (struct rt6_info *) dst;
 974
 
 
 
 975	if (rt) {
 
 976		if (rt->rt6i_flags & RTF_CACHE) {
 977			if (rt6_check_expired(rt)) {
 978				ip6_del_rt(rt);
 979				dst = NULL;
 
 
 
 
 
 
 
 980			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 981		} else {
 982			dst_release(dst);
 983			dst = NULL;
 
 
 
 
 
 
 984		}
 
 
 985	}
 986	return dst;
 987}
 988
 989static void ip6_link_failure(struct sk_buff *skb)
 
 
 990{
 991	struct rt6_info *rt;
 
 
 992
 993	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 994
 995	rt = (struct rt6_info *) skb_dst(skb);
 996	if (rt) {
 997		if (rt->rt6i_flags&RTF_CACHE) {
 998			dst_set_expires(&rt->dst, 0);
 999			rt->rt6i_flags |= RTF_EXPIRES;
1000		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1001			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1003}
1004
1005static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 
 
 
 
 
 
 
 
 
1006{
1007	struct rt6_info *rt6 = (struct rt6_info*)dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
1009	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1010		rt6->rt6i_flags |= RTF_MODIFIED;
1011		if (mtu < IPV6_MIN_MTU) {
1012			u32 features = dst_metric(dst, RTAX_FEATURES);
1013			mtu = IPV6_MIN_MTU;
1014			features |= RTAX_FEATURE_ALLFRAG;
1015			dst_metric_set(dst, RTAX_FEATURES, features);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1016		}
1017		dst_metric_set(dst, RTAX_MTU, mtu);
1018	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019}
 
1020
1021static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1022{
1023	struct net_device *dev = dst->dev;
1024	unsigned int mtu = dst_mtu(dst);
1025	struct net *net = dev_net(dev);
1026
1027	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1028
1029	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1030		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1031
1032	/*
1033	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036	 * rely only on pmtu discovery"
1037	 */
1038	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1039		mtu = IPV6_MAXPLEN;
1040	return mtu;
1041}
1042
1043static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1044{
1045	unsigned int mtu = IPV6_MIN_MTU;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1046	struct inet6_dev *idev;
 
 
1047
1048	rcu_read_lock();
1049	idev = __in6_dev_get(dst->dev);
1050	if (idev)
1051		mtu = idev->cnf.mtu6;
1052	rcu_read_unlock();
1053
1054	return mtu;
1055}
 
 
 
1056
1057static struct dst_entry *icmp6_dst_gc_list;
1058static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 
 
 
 
 
 
 
1059
1060struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1061				  struct neighbour *neigh,
1062				  const struct in6_addr *addr)
1063{
 
1064	struct rt6_info *rt;
1065	struct inet6_dev *idev = in6_dev_get(dev);
1066	struct net *net = dev_net(dev);
1067
1068	if (unlikely(idev == NULL))
1069		return NULL;
1070
1071	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1072	if (unlikely(rt == NULL)) {
1073		in6_dev_put(idev);
 
1074		goto out;
1075	}
1076
1077	if (neigh)
1078		neigh_hold(neigh);
1079	else {
1080		neigh = ndisc_get_neigh(dev, addr);
1081		if (IS_ERR(neigh))
1082			neigh = NULL;
1083	}
1084
1085	rt->dst.flags |= DST_HOST;
1086	rt->dst.output  = ip6_output;
1087	dst_set_neighbour(&rt->dst, neigh);
1088	atomic_set(&rt->dst.__refcnt, 1);
1089	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1090
1091	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092	rt->rt6i_dst.plen = 128;
1093	rt->rt6i_idev     = idev;
 
1094
1095	spin_lock_bh(&icmp6_dst_lock);
1096	rt->dst.next = icmp6_dst_gc_list;
1097	icmp6_dst_gc_list = &rt->dst;
1098	spin_unlock_bh(&icmp6_dst_lock);
1099
1100	fib6_force_start_gc(net);
1101
1102out:
1103	return &rt->dst;
1104}
1105
1106int icmp6_dst_gc(void)
1107{
1108	struct dst_entry *dst, **pprev;
1109	int more = 0;
1110
1111	spin_lock_bh(&icmp6_dst_lock);
1112	pprev = &icmp6_dst_gc_list;
1113
1114	while ((dst = *pprev) != NULL) {
1115		if (!atomic_read(&dst->__refcnt)) {
1116			*pprev = dst->next;
1117			dst_free(dst);
1118		} else {
1119			pprev = &dst->next;
1120			++more;
1121		}
1122	}
1123
1124	spin_unlock_bh(&icmp6_dst_lock);
1125
1126	return more;
1127}
1128
1129static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1130			    void *arg)
1131{
1132	struct dst_entry *dst, **pprev;
1133
1134	spin_lock_bh(&icmp6_dst_lock);
1135	pprev = &icmp6_dst_gc_list;
1136	while ((dst = *pprev) != NULL) {
1137		struct rt6_info *rt = (struct rt6_info *) dst;
1138		if (func(rt, arg)) {
1139			*pprev = dst->next;
1140			dst_free(dst);
1141		} else {
1142			pprev = &dst->next;
1143		}
1144	}
1145	spin_unlock_bh(&icmp6_dst_lock);
1146}
1147
1148static int ip6_dst_gc(struct dst_ops *ops)
1149{
1150	unsigned long now = jiffies;
1151	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1152	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1153	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1154	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1155	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1156	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
 
1157	int entries;
1158
1159	entries = dst_entries_get_fast(ops);
1160	if (time_after(rt_last_gc + rt_min_interval, now) &&
1161	    entries <= rt_max_size)
1162		goto out;
1163
1164	net->ipv6.ip6_rt_gc_expire++;
1165	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1166	net->ipv6.ip6_rt_last_gc = now;
1167	entries = dst_entries_get_slow(ops);
1168	if (entries < ops->gc_thresh)
1169		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1170out:
1171	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1172	return entries > rt_max_size;
1173}
1174
1175/* Clean host part of a prefix. Not necessary in radix tree,
1176   but results in cleaner routing tables.
 
 
 
 
 
 
 
 
 
 
 
 
 
1177
1178   Remove it only when all the things will work!
1179 */
1180
1181int ip6_dst_hoplimit(struct dst_entry *dst)
1182{
1183	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1184	if (hoplimit == 0) {
1185		struct net_device *dev = dst->dev;
1186		struct inet6_dev *idev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
1188		rcu_read_lock();
1189		idev = __in6_dev_get(dev);
1190		if (idev)
1191			hoplimit = idev->cnf.hop_limit;
1192		else
1193			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
 
 
1194		rcu_read_unlock();
 
 
 
1195	}
1196	return hoplimit;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1197}
1198EXPORT_SYMBOL(ip6_dst_hoplimit);
1199
1200/*
1201 *
1202 */
 
 
 
 
 
 
 
1203
1204int ip6_route_add(struct fib6_config *cfg)
 
 
1205{
1206	int err;
1207	struct net *net = cfg->fc_nlinfo.nl_net;
1208	struct rt6_info *rt = NULL;
1209	struct net_device *dev = NULL;
1210	struct inet6_dev *idev = NULL;
1211	struct fib6_table *table;
1212	int addr_type;
 
1213
1214	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1215		return -EINVAL;
1216#ifndef CONFIG_IPV6_SUBTREES
1217	if (cfg->fc_src_len)
1218		return -EINVAL;
1219#endif
 
 
 
 
 
 
 
1220	if (cfg->fc_ifindex) {
1221		err = -ENODEV;
1222		dev = dev_get_by_index(net, cfg->fc_ifindex);
1223		if (!dev)
1224			goto out;
1225		idev = in6_dev_get(dev);
1226		if (!idev)
1227			goto out;
1228	}
1229
1230	if (cfg->fc_metric == 0)
1231		cfg->fc_metric = IP6_RT_PRIO_USER;
1232
1233	table = fib6_new_table(net, cfg->fc_table);
1234	if (table == NULL) {
1235		err = -ENOBUFS;
1236		goto out;
1237	}
1238
1239	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1240
1241	if (rt == NULL) {
1242		err = -ENOMEM;
1243		goto out;
1244	}
1245
1246	rt->dst.obsolete = -1;
1247	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1248				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1249				0;
1250
1251	if (cfg->fc_protocol == RTPROT_UNSPEC)
1252		cfg->fc_protocol = RTPROT_BOOT;
1253	rt->rt6i_protocol = cfg->fc_protocol;
1254
1255	addr_type = ipv6_addr_type(&cfg->fc_dst);
1256
1257	if (addr_type & IPV6_ADDR_MULTICAST)
1258		rt->dst.input = ip6_mc_input;
1259	else if (cfg->fc_flags & RTF_LOCAL)
1260		rt->dst.input = ip6_input;
1261	else
1262		rt->dst.input = ip6_forward;
1263
1264	rt->dst.output = ip6_output;
1265
1266	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1267	rt->rt6i_dst.plen = cfg->fc_dst_len;
1268	if (rt->rt6i_dst.plen == 128)
1269	       rt->dst.flags |= DST_HOST;
1270
1271	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273		if (!metrics) {
1274			err = -ENOMEM;
1275			goto out;
1276		}
1277		dst_init_metrics(&rt->dst, metrics, 0);
 
1278	}
1279#ifdef CONFIG_IPV6_SUBTREES
1280	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1281	rt->rt6i_src.plen = cfg->fc_src_len;
1282#endif
1283
1284	rt->rt6i_metric = cfg->fc_metric;
1285
1286	/* We cannot add true routes via loopback here,
1287	   they would result in kernel looping; promote them to reject routes
1288	 */
1289	if ((cfg->fc_flags & RTF_REJECT) ||
1290	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1291					      && !(cfg->fc_flags&RTF_LOCAL))) {
1292		/* hold loopback dev/idev if we haven't done so. */
1293		if (dev != net->loopback_dev) {
1294			if (dev) {
1295				dev_put(dev);
1296				in6_dev_put(idev);
1297			}
1298			dev = net->loopback_dev;
1299			dev_hold(dev);
1300			idev = in6_dev_get(dev);
1301			if (!idev) {
1302				err = -ENODEV;
1303				goto out;
1304			}
1305		}
1306		rt->dst.output = ip6_pkt_discard_out;
1307		rt->dst.input = ip6_pkt_discard;
1308		rt->dst.error = -ENETUNREACH;
1309		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1310		goto install_route;
1311	}
1312
1313	if (cfg->fc_flags & RTF_GATEWAY) {
1314		const struct in6_addr *gw_addr;
1315		int gwa_type;
 
 
1316
1317		gw_addr = &cfg->fc_gateway;
1318		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1319		gwa_type = ipv6_addr_type(gw_addr);
1320
1321		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1322			struct rt6_info *grt;
1323
1324			/* IPv6 strictly inhibits using not link-local
1325			   addresses as nexthop address.
1326			   Otherwise, router will not able to send redirects.
1327			   It is very good, but in some (rare!) circumstances
1328			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1329			   some exceptions. --ANK
1330			 */
1331			err = -EINVAL;
1332			if (!(gwa_type&IPV6_ADDR_UNICAST))
1333				goto out;
1334
1335			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
1336
1337			err = -EHOSTUNREACH;
1338			if (grt == NULL)
1339				goto out;
1340			if (dev) {
1341				if (dev != grt->rt6i_dev) {
1342					dst_release(&grt->dst);
1343					goto out;
1344				}
1345			} else {
1346				dev = grt->rt6i_dev;
1347				idev = grt->rt6i_idev;
1348				dev_hold(dev);
1349				in6_dev_hold(grt->rt6i_idev);
1350			}
1351			if (!(grt->rt6i_flags&RTF_GATEWAY))
1352				err = 0;
1353			dst_release(&grt->dst);
1354
1355			if (err)
1356				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357		}
1358		err = -EINVAL;
1359		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1360			goto out;
1361	}
 
1362
1363	err = -ENODEV;
1364	if (dev == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
1365		goto out;
 
1366
1367	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1368		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1369			err = -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370			goto out;
1371		}
1372		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1373		rt->rt6i_prefsrc.plen = 128;
1374	} else
1375		rt->rt6i_prefsrc.plen = 0;
1376
1377	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1378		struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1379		if (IS_ERR(n)) {
1380			err = PTR_ERR(n);
1381			goto out;
 
 
 
 
 
 
 
 
 
1382		}
1383		dst_set_neighbour(&rt->dst, n);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1384	}
1385
1386	rt->rt6i_flags = cfg->fc_flags;
 
 
 
 
 
1387
1388install_route:
1389	if (cfg->fc_mx) {
1390		struct nlattr *nla;
1391		int remaining;
1392
1393		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1394			int type = nla_type(nla);
 
 
1395
1396			if (type) {
1397				if (type > RTAX_MAX) {
1398					err = -EINVAL;
1399					goto out;
1400				}
1401
1402				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1403			}
 
 
 
 
 
 
1404		}
1405	}
 
 
 
 
 
 
 
 
 
 
 
1406
1407	rt->dst.dev = dev;
1408	rt->rt6i_idev = idev;
1409	rt->rt6i_table = table;
 
 
 
 
 
1410
1411	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
1412
1413	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
1414
 
1415out:
1416	if (dev)
1417		dev_put(dev);
1418	if (idev)
1419		in6_dev_put(idev);
1420	if (rt)
1421		dst_free(&rt->dst);
1422	return err;
1423}
1424
1425static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
1426{
 
1427	int err;
1428	struct fib6_table *table;
1429	struct net *net = dev_net(rt->rt6i_dev);
1430
1431	if (rt == net->ipv6.ip6_null_entry)
1432		return -ENOENT;
 
1433
1434	table = rt->rt6i_table;
1435	write_lock_bh(&table->tb6_lock);
1436
1437	err = fib6_del(rt, info);
1438	dst_release(&rt->dst);
1439
1440	write_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
 
 
1442	return err;
1443}
1444
1445int ip6_del_rt(struct rt6_info *rt)
1446{
1447	struct nl_info info = {
1448		.nl_net = dev_net(rt->rt6i_dev),
 
1449	};
 
1450	return __ip6_del_rt(rt, &info);
1451}
1452
1453static int ip6_route_del(struct fib6_config *cfg)
1454{
 
 
 
1455	struct fib6_table *table;
1456	struct fib6_node *fn;
1457	struct rt6_info *rt;
1458	int err = -ESRCH;
1459
1460	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1461	if (table == NULL)
1462		return err;
1463
1464	read_lock_bh(&table->tb6_lock);
1465
1466	fn = fib6_locate(&table->tb6_root,
1467			 &cfg->fc_dst, cfg->fc_dst_len,
1468			 &cfg->fc_src, cfg->fc_src_len);
1469
1470	if (fn) {
1471		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1472			if (cfg->fc_ifindex &&
1473			    (rt->rt6i_dev == NULL ||
1474			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1475				continue;
1476			if (cfg->fc_flags & RTF_GATEWAY &&
1477			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1478				continue;
1479			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1480				continue;
1481			dst_hold(&rt->dst);
1482			read_unlock_bh(&table->tb6_lock);
1483
1484			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1485		}
1486	}
1487	read_unlock_bh(&table->tb6_lock);
1488
 
 
 
 
 
 
 
 
 
 
1489	return err;
1490}
1491
1492/*
1493 *	Handle redirects
1494 */
1495struct ip6rd_flowi {
1496	struct flowi6 fl6;
1497	struct in6_addr gateway;
1498};
1499
1500static struct rt6_info *__ip6_route_redirect(struct net *net,
1501					     struct fib6_table *table,
1502					     struct flowi6 *fl6,
1503					     int flags)
1504{
1505	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1506	struct rt6_info *rt;
1507	struct fib6_node *fn;
1508
1509	/*
1510	 * Get the "current" route for this destination and
1511	 * check if the redirect has come from approriate router.
1512	 *
1513	 * RFC 2461 specifies that redirects should only be
1514	 * accepted if they come from the nexthop to the target.
1515	 * Due to the way the routes are chosen, this notion
1516	 * is a bit fuzzy and one might need to check all possible
1517	 * routes.
1518	 */
1519
1520	read_lock_bh(&table->tb6_lock);
1521	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1522restart:
1523	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1524		/*
1525		 * Current route is on-link; redirect is always invalid.
1526		 *
1527		 * Seems, previous statement is not true. It could
1528		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1529		 * But then router serving it might decide, that we should
1530		 * know truth 8)8) --ANK (980726).
1531		 */
1532		if (rt6_check_expired(rt))
1533			continue;
1534		if (!(rt->rt6i_flags & RTF_GATEWAY))
1535			continue;
1536		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1537			continue;
1538		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1539			continue;
1540		break;
1541	}
1542
1543	if (!rt)
1544		rt = net->ipv6.ip6_null_entry;
1545	BACKTRACK(net, &fl6->saddr);
1546out:
1547	dst_hold(&rt->dst);
 
1548
1549	read_unlock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
1550
1551	return rt;
 
 
 
 
 
 
 
 
 
1552};
1553
1554static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1555					   const struct in6_addr *src,
1556					   const struct in6_addr *gateway,
1557					   struct net_device *dev)
1558{
1559	int flags = RT6_LOOKUP_F_HAS_SADDR;
1560	struct net *net = dev_net(dev);
1561	struct ip6rd_flowi rdfl = {
1562		.fl6 = {
1563			.flowi6_oif = dev->ifindex,
1564			.daddr = *dest,
1565			.saddr = *src,
1566		},
1567	};
1568
1569	ipv6_addr_copy(&rdfl.gateway, gateway);
 
 
1570
1571	if (rt6_need_strict(dest))
1572		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
1573
1574	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1575						   flags, __ip6_route_redirect);
1576}
1577
1578void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1579		  const struct in6_addr *saddr,
1580		  struct neighbour *neigh, u8 *lladdr, int on_link)
1581{
1582	struct rt6_info *rt, *nrt = NULL;
1583	struct netevent_redirect netevent;
1584	struct net *net = dev_net(neigh->dev);
1585
1586	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1587
1588	if (rt == net->ipv6.ip6_null_entry) {
1589		if (net_ratelimit())
1590			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1591			       "for redirect target\n");
1592		goto out;
1593	}
1594
1595	/*
1596	 *	We have finally decided to accept it.
1597	 */
1598
1599	neigh_update(neigh, lladdr, NUD_STALE,
1600		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1601		     NEIGH_UPDATE_F_OVERRIDE|
1602		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1603				     NEIGH_UPDATE_F_ISROUTER))
1604		     );
1605
1606	/*
1607	 * Redirect received -> path was valid.
1608	 * Look, redirects are sent only in response to data packets,
1609	 * so that this nexthop apparently is reachable. --ANK
1610	 */
1611	dst_confirm(&rt->dst);
1612
1613	/* Duplicate redirect: silently ignore. */
1614	if (neigh == dst_get_neighbour_raw(&rt->dst))
1615		goto out;
1616
1617	nrt = ip6_rt_copy(rt, dest);
1618	if (nrt == NULL)
1619		goto out;
1620
1621	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1622	if (on_link)
1623		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1624
1625	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1626	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
 
1627
1628	if (ip6_ins_rt(nrt))
1629		goto out;
 
 
1630
1631	netevent.old = &rt->dst;
1632	netevent.new = &nrt->dst;
1633	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 
 
 
 
 
 
 
1634
1635	if (rt->rt6i_flags&RTF_CACHE) {
1636		ip6_del_rt(rt);
1637		return;
 
 
 
1638	}
 
1639
1640out:
1641	dst_release(&rt->dst);
1642}
1643
1644/*
1645 *	Handle ICMP "packet too big" messages
1646 *	i.e. Path MTU discovery
1647 */
1648
1649static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1650			     struct net *net, u32 pmtu, int ifindex)
1651{
1652	struct rt6_info *rt, *nrt;
1653	int allfrag = 0;
1654again:
1655	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1656	if (rt == NULL)
1657		return;
 
 
 
1658
1659	if (rt6_check_expired(rt)) {
1660		ip6_del_rt(rt);
1661		goto again;
 
 
 
1662	}
1663
1664	if (pmtu >= dst_mtu(&rt->dst))
1665		goto out;
1666
1667	if (pmtu < IPV6_MIN_MTU) {
1668		/*
1669		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1670		 * MTU (1280) and a fragment header should always be included
1671		 * after a node receiving Too Big message reporting PMTU is
1672		 * less than the IPv6 Minimum Link MTU.
1673		 */
1674		pmtu = IPV6_MIN_MTU;
1675		allfrag = 1;
1676	}
1677
1678	/* New mtu received -> path was valid.
1679	   They are sent only in response to data packets,
1680	   so that this nexthop apparently is reachable. --ANK
1681	 */
1682	dst_confirm(&rt->dst);
1683
1684	/* Host route. If it is static, it would be better
1685	   not to override it, but add new one, so that
1686	   when cache entry will expire old pmtu
1687	   would return automatically.
1688	 */
1689	if (rt->rt6i_flags & RTF_CACHE) {
1690		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1691		if (allfrag) {
1692			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1693			features |= RTAX_FEATURE_ALLFRAG;
1694			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1695		}
1696		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1697		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1698		goto out;
1699	}
1700
1701	/* Network route.
1702	   Two cases are possible:
1703	   1. It is connected route. Action: COW
1704	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
 
 
 
 
 
 
1705	 */
1706	if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1707		nrt = rt6_alloc_cow(rt, daddr, saddr);
1708	else
1709		nrt = rt6_alloc_clone(rt, daddr);
1710
1711	if (nrt) {
1712		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1713		if (allfrag) {
1714			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1715			features |= RTAX_FEATURE_ALLFRAG;
1716			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1717		}
1718
1719		/* According to RFC 1981, detecting PMTU increase shouldn't be
1720		 * happened within 5 mins, the recommended timer is 10 mins.
1721		 * Here this route expiration time is set to ip6_rt_mtu_expires
1722		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1723		 * and detecting PMTU increase will be automatically happened.
1724		 */
1725		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1726		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
1727
1728		ip6_ins_rt(nrt);
 
 
 
1729	}
1730out:
1731	dst_release(&rt->dst);
1732}
1733
1734void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1735			struct net_device *dev, u32 pmtu)
1736{
1737	struct net *net = dev_net(dev);
 
 
 
 
 
1738
1739	/*
1740	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1741	 * is sending along the path" that caused the Packet Too Big message.
1742	 * Since it's not possible in the general case to determine which
1743	 * interface was used to send the original packet, we update the MTU
1744	 * on the interface that will be used to send future packets. We also
1745	 * update the MTU on the interface that received the Packet Too Big in
1746	 * case the original packet was forced out that interface with
1747	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1748	 * correct behaviour, which would be to update the MTU on all
1749	 * interfaces.
1750	 */
1751	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1752	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1753}
1754
1755/*
1756 *	Misc support functions
1757 */
 
 
 
1758
1759static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1760				    const struct in6_addr *dest)
1761{
1762	struct net *net = dev_net(ort->rt6i_dev);
1763	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1764					    ort->dst.dev, 0);
1765
1766	if (rt) {
1767		rt->dst.input = ort->dst.input;
1768		rt->dst.output = ort->dst.output;
1769		rt->dst.flags |= DST_HOST;
1770
1771		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1772		rt->rt6i_dst.plen = 128;
1773		dst_copy_metrics(&rt->dst, &ort->dst);
1774		rt->dst.error = ort->dst.error;
1775		rt->rt6i_idev = ort->rt6i_idev;
1776		if (rt->rt6i_idev)
1777			in6_dev_hold(rt->rt6i_idev);
1778		rt->dst.lastuse = jiffies;
1779		rt->rt6i_expires = 0;
1780
1781		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1782		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1783		rt->rt6i_metric = 0;
1784
1785#ifdef CONFIG_IPV6_SUBTREES
1786		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1787#endif
1788		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1789		rt->rt6i_table = ort->rt6i_table;
 
 
 
 
 
 
1790	}
1791	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1792}
1793
1794#ifdef CONFIG_IPV6_ROUTE_INFO
1795static struct rt6_info *rt6_get_route_info(struct net *net,
1796					   const struct in6_addr *prefix, int prefixlen,
1797					   const struct in6_addr *gwaddr, int ifindex)
 
1798{
 
 
1799	struct fib6_node *fn;
1800	struct rt6_info *rt = NULL;
1801	struct fib6_table *table;
1802
1803	table = fib6_get_table(net, RT6_TABLE_INFO);
1804	if (table == NULL)
1805		return NULL;
1806
1807	write_lock_bh(&table->tb6_lock);
1808	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1809	if (!fn)
1810		goto out;
1811
1812	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1813		if (rt->rt6i_dev->ifindex != ifindex)
 
1814			continue;
1815		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1816			continue;
1817		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
1818			continue;
1819		dst_hold(&rt->dst);
1820		break;
1821	}
1822out:
1823	write_unlock_bh(&table->tb6_lock);
1824	return rt;
1825}
1826
1827static struct rt6_info *rt6_add_route_info(struct net *net,
1828					   const struct in6_addr *prefix, int prefixlen,
1829					   const struct in6_addr *gwaddr, int ifindex,
1830					   unsigned pref)
 
1831{
1832	struct fib6_config cfg = {
1833		.fc_table	= RT6_TABLE_INFO,
1834		.fc_metric	= IP6_RT_PRIO_USER,
1835		.fc_ifindex	= ifindex,
1836		.fc_dst_len	= prefixlen,
1837		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1838				  RTF_UP | RTF_PREF(pref),
1839		.fc_nlinfo.pid = 0,
 
 
1840		.fc_nlinfo.nlh = NULL,
1841		.fc_nlinfo.nl_net = net,
1842	};
1843
1844	ipv6_addr_copy(&cfg.fc_dst, prefix);
1845	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1846
1847	/* We should treat it as a default route if prefix length is 0. */
1848	if (!prefixlen)
1849		cfg.fc_flags |= RTF_DEFAULT;
1850
1851	ip6_route_add(&cfg);
1852
1853	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1854}
1855#endif
1856
1857struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
1858{
1859	struct rt6_info *rt;
 
1860	struct fib6_table *table;
1861
1862	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1863	if (table == NULL)
1864		return NULL;
1865
1866	write_lock_bh(&table->tb6_lock);
1867	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1868		if (dev == rt->rt6i_dev &&
1869		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1870		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
1871			break;
1872	}
1873	if (rt)
1874		dst_hold(&rt->dst);
1875	write_unlock_bh(&table->tb6_lock);
1876	return rt;
1877}
1878
1879struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
1880				     struct net_device *dev,
1881				     unsigned int pref)
 
 
1882{
1883	struct fib6_config cfg = {
1884		.fc_table	= RT6_TABLE_DFLT,
1885		.fc_metric	= IP6_RT_PRIO_USER,
1886		.fc_ifindex	= dev->ifindex,
1887		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1888				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1889		.fc_nlinfo.pid = 0,
 
 
1890		.fc_nlinfo.nlh = NULL,
1891		.fc_nlinfo.nl_net = dev_net(dev),
 
1892	};
1893
1894	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1895
1896	ip6_route_add(&cfg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1897
1898	return rt6_get_dflt_router(gwaddr, dev);
1899}
1900
1901void rt6_purge_dflt_routers(struct net *net)
1902{
1903	struct rt6_info *rt;
1904	struct fib6_table *table;
 
 
1905
1906	/* NOTE: Keep consistent with rt6_get_dflt_router */
1907	table = fib6_get_table(net, RT6_TABLE_DFLT);
1908	if (table == NULL)
1909		return;
1910
1911restart:
1912	read_lock_bh(&table->tb6_lock);
1913	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1914		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1915			dst_hold(&rt->dst);
1916			read_unlock_bh(&table->tb6_lock);
1917			ip6_del_rt(rt);
1918			goto restart;
1919		}
1920	}
1921	read_unlock_bh(&table->tb6_lock);
 
1922}
1923
1924static void rtmsg_to_fib6_config(struct net *net,
1925				 struct in6_rtmsg *rtmsg,
1926				 struct fib6_config *cfg)
1927{
1928	memset(cfg, 0, sizeof(*cfg));
1929
1930	cfg->fc_table = RT6_TABLE_MAIN;
1931	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1932	cfg->fc_metric = rtmsg->rtmsg_metric;
1933	cfg->fc_expires = rtmsg->rtmsg_info;
1934	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1935	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1936	cfg->fc_flags = rtmsg->rtmsg_flags;
 
1937
1938	cfg->fc_nlinfo.nl_net = net;
1939
1940	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1941	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1942	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
 
1943}
1944
1945int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1946{
1947	struct fib6_config cfg;
1948	struct in6_rtmsg rtmsg;
1949	int err;
1950
1951	switch(cmd) {
1952	case SIOCADDRT:		/* Add a route */
1953	case SIOCDELRT:		/* Delete a route */
1954		if (!capable(CAP_NET_ADMIN))
1955			return -EPERM;
1956		err = copy_from_user(&rtmsg, arg,
1957				     sizeof(struct in6_rtmsg));
1958		if (err)
1959			return -EFAULT;
1960
1961		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1962
1963		rtnl_lock();
1964		switch (cmd) {
1965		case SIOCADDRT:
1966			err = ip6_route_add(&cfg);
1967			break;
1968		case SIOCDELRT:
1969			err = ip6_route_del(&cfg);
1970			break;
1971		default:
1972			err = -EINVAL;
1973		}
1974		rtnl_unlock();
1975
1976		return err;
 
 
 
 
 
 
 
1977	}
1978
1979	return -EINVAL;
1980}
1981
1982/*
1983 *	Drop the packet on the floor
1984 */
1985
1986static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1987{
1988	int type;
1989	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
 
 
 
 
 
1990	switch (ipstats_mib_noroutes) {
1991	case IPSTATS_MIB_INNOROUTES:
1992		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1993		if (type == IPV6_ADDR_ANY) {
1994			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1995				      IPSTATS_MIB_INADDRERRORS);
1996			break;
1997		}
1998		/* FALLTHROUGH */
 
1999	case IPSTATS_MIB_OUTNOROUTES:
2000		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2001			      ipstats_mib_noroutes);
2002		break;
2003	}
 
 
 
 
 
2004	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2005	kfree_skb(skb);
2006	return 0;
2007}
2008
2009static int ip6_pkt_discard(struct sk_buff *skb)
2010{
2011	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2012}
2013
2014static int ip6_pkt_discard_out(struct sk_buff *skb)
2015{
2016	skb->dev = skb_dst(skb)->dev;
2017	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2018}
2019
2020#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021
2022static int ip6_pkt_prohibit(struct sk_buff *skb)
2023{
2024	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2025}
2026
2027static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2028{
2029	skb->dev = skb_dst(skb)->dev;
2030	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2031}
2032
2033#endif
2034
2035/*
2036 *	Allocate a dst for local (unicast / anycast) address.
2037 */
2038
2039struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2040				    const struct in6_addr *addr,
2041				    int anycast)
2042{
2043	struct net *net = dev_net(idev->dev);
2044	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2045					    net->loopback_dev, 0);
2046	struct neighbour *neigh;
 
 
 
 
 
 
 
 
 
2047
2048	if (rt == NULL) {
2049		if (net_ratelimit())
2050			pr_warning("IPv6:  Maximum number of routes reached,"
2051				   " consider increasing route/max_size.\n");
2052		return ERR_PTR(-ENOMEM);
 
2053	}
2054
2055	in6_dev_hold(idev);
2056
2057	rt->dst.flags |= DST_HOST;
2058	rt->dst.input = ip6_input;
2059	rt->dst.output = ip6_output;
2060	rt->rt6i_idev = idev;
2061	rt->dst.obsolete = -1;
2062
2063	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064	if (anycast)
2065		rt->rt6i_flags |= RTF_ANYCAST;
2066	else
2067		rt->rt6i_flags |= RTF_LOCAL;
2068	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2069	if (IS_ERR(neigh)) {
2070		dst_free(&rt->dst);
2071
2072		return ERR_CAST(neigh);
 
 
 
2073	}
2074	dst_set_neighbour(&rt->dst, neigh);
2075
2076	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2077	rt->rt6i_dst.plen = 128;
2078	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2079
2080	atomic_set(&rt->dst.__refcnt, 1);
2081
2082	return rt;
2083}
2084
2085int ip6_route_get_saddr(struct net *net,
2086			struct rt6_info *rt,
2087			const struct in6_addr *daddr,
2088			unsigned int prefs,
2089			struct in6_addr *saddr)
2090{
2091	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2092	int err = 0;
2093	if (rt->rt6i_prefsrc.plen)
2094		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2095	else
2096		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2097					 daddr, prefs, saddr);
2098	return err;
2099}
2100
2101/* remove deleted ip from prefsrc entries */
2102struct arg_dev_net_ip {
2103	struct net_device *dev;
2104	struct net *net;
2105	struct in6_addr *addr;
2106};
2107
2108static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2109{
2110	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2111	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2112	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2113
2114	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2115	    rt != net->ipv6.ip6_null_entry &&
2116	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2117		/* remove prefsrc entry */
2118		rt->rt6i_prefsrc.plen = 0;
 
2119	}
2120	return 0;
2121}
2122
2123void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2124{
2125	struct net *net = dev_net(ifp->idev->dev);
2126	struct arg_dev_net_ip adni = {
2127		.dev = ifp->idev->dev,
2128		.net = net,
2129		.addr = &ifp->addr,
2130	};
2131	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2132}
2133
2134struct arg_dev_net {
2135	struct net_device *dev;
2136	struct net *net;
2137};
2138
2139static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
2140{
2141	const struct arg_dev_net *adn = arg;
2142	const struct net_device *dev = adn->dev;
2143
2144	if ((rt->rt6i_dev == dev || dev == NULL) &&
2145	    rt != adn->net->ipv6.ip6_null_entry) {
2146		RT6_TRACE("deleted by ifdown %p\n", rt);
 
 
 
 
2147		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2148	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2149	return 0;
2150}
2151
2152void rt6_ifdown(struct net *net, struct net_device *dev)
2153{
2154	struct arg_dev_net adn = {
2155		.dev = dev,
2156		.net = net,
 
 
2157	};
2158
2159	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2160	icmp6_clean_all(fib6_ifdown, &adn);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2161}
2162
2163struct rt6_mtu_change_arg
 
 
2164{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2165	struct net_device *dev;
2166	unsigned mtu;
 
2167};
2168
2169static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2170{
2171	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2172	struct inet6_dev *idev;
2173
2174	/* In IPv6 pmtu discovery is not optional,
2175	   so that RTAX_MTU lock cannot disable it.
2176	   We still use this lock to block changes
2177	   caused by addrconf/ndisc.
2178	*/
2179
2180	idev = __in6_dev_get(arg->dev);
2181	if (idev == NULL)
2182		return 0;
2183
2184	/* For administrative MTU increase, there is no way to discover
2185	   IPv6 PMTU increase, so PMTU increase should be updated here.
2186	   Since RFC 1981 doesn't include administrative MTU increase
2187	   update PMTU increase is a MUST. (i.e. jumbo frame)
2188	 */
2189	/*
2190	   If new MTU is less than route PMTU, this new MTU will be the
2191	   lowest MTU in the path, update the route PMTU to reflect PMTU
2192	   decreases; if new MTU is greater than route PMTU, and the
2193	   old MTU is the lowest MTU in the path, update the route PMTU
2194	   to reflect the increase. In this case if the other nodes' MTU
2195	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2196	   PMTU discouvery.
2197	 */
2198	if (rt->rt6i_dev == arg->dev &&
2199	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2200	    (dst_mtu(&rt->dst) >= arg->mtu ||
2201	     (dst_mtu(&rt->dst) < arg->mtu &&
2202	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2203		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2204	}
2205	return 0;
 
2206}
2207
2208void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2209{
2210	struct rt6_mtu_change_arg arg = {
2211		.dev = dev,
2212		.mtu = mtu,
2213	};
2214
2215	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2216}
2217
2218static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2219	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2220	[RTA_OIF]               = { .type = NLA_U32 },
2221	[RTA_IIF]		= { .type = NLA_U32 },
2222	[RTA_PRIORITY]          = { .type = NLA_U32 },
2223	[RTA_METRICS]           = { .type = NLA_NESTED },
 
 
 
 
 
 
 
 
 
 
 
 
2224};
2225
2226static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2227			      struct fib6_config *cfg)
 
2228{
2229	struct rtmsg *rtm;
2230	struct nlattr *tb[RTA_MAX+1];
 
2231	int err;
2232
2233	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2234	if (err < 0)
2235		goto errout;
2236
2237	err = -EINVAL;
2238	rtm = nlmsg_data(nlh);
2239	memset(cfg, 0, sizeof(*cfg));
2240
2241	cfg->fc_table = rtm->rtm_table;
2242	cfg->fc_dst_len = rtm->rtm_dst_len;
2243	cfg->fc_src_len = rtm->rtm_src_len;
2244	cfg->fc_flags = RTF_UP;
2245	cfg->fc_protocol = rtm->rtm_protocol;
 
 
 
 
 
 
 
 
 
 
 
 
 
2246
2247	if (rtm->rtm_type == RTN_UNREACHABLE)
 
 
 
2248		cfg->fc_flags |= RTF_REJECT;
2249
2250	if (rtm->rtm_type == RTN_LOCAL)
2251		cfg->fc_flags |= RTF_LOCAL;
2252
2253	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2254	cfg->fc_nlinfo.nlh = nlh;
2255	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
 
 
 
2256
2257	if (tb[RTA_GATEWAY]) {
2258		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2259		cfg->fc_flags |= RTF_GATEWAY;
2260	}
 
 
 
 
2261
2262	if (tb[RTA_DST]) {
2263		int plen = (rtm->rtm_dst_len + 7) >> 3;
2264
2265		if (nla_len(tb[RTA_DST]) < plen)
2266			goto errout;
2267
2268		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2269	}
2270
2271	if (tb[RTA_SRC]) {
2272		int plen = (rtm->rtm_src_len + 7) >> 3;
2273
2274		if (nla_len(tb[RTA_SRC]) < plen)
2275			goto errout;
2276
2277		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2278	}
2279
2280	if (tb[RTA_PREFSRC])
2281		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2282
2283	if (tb[RTA_OIF])
2284		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2285
2286	if (tb[RTA_PRIORITY])
2287		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2288
2289	if (tb[RTA_METRICS]) {
2290		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2291		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2292	}
2293
2294	if (tb[RTA_TABLE])
2295		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2297	err = 0;
2298errout:
2299	return err;
2300}
2301
2302static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2303{
2304	struct fib6_config cfg;
2305	int err;
2306
2307	err = rtm_to_fib6_config(skb, nlh, &cfg);
2308	if (err < 0)
2309		return err;
2310
2311	return ip6_route_del(&cfg);
 
 
 
 
 
 
 
 
 
 
 
2312}
2313
2314static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2315{
2316	struct fib6_config cfg;
2317	int err;
2318
2319	err = rtm_to_fib6_config(skb, nlh, &cfg);
2320	if (err < 0)
2321		return err;
2322
2323	return ip6_route_add(&cfg);
 
 
 
 
 
 
2324}
2325
2326static inline size_t rt6_nlmsg_size(void)
 
2327{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2328	return NLMSG_ALIGN(sizeof(struct rtmsg))
2329	       + nla_total_size(16) /* RTA_SRC */
2330	       + nla_total_size(16) /* RTA_DST */
2331	       + nla_total_size(16) /* RTA_GATEWAY */
2332	       + nla_total_size(16) /* RTA_PREFSRC */
2333	       + nla_total_size(4) /* RTA_TABLE */
2334	       + nla_total_size(4) /* RTA_IIF */
2335	       + nla_total_size(4) /* RTA_OIF */
2336	       + nla_total_size(4) /* RTA_PRIORITY */
2337	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2338	       + nla_total_size(sizeof(struct rta_cacheinfo));
 
 
 
2339}
2340
2341static int rt6_fill_node(struct net *net,
2342			 struct sk_buff *skb, struct rt6_info *rt,
2343			 struct in6_addr *dst, struct in6_addr *src,
2344			 int iif, int type, u32 pid, u32 seq,
2345			 int prefix, int nowait, unsigned int flags)
2346{
2347	struct rtmsg *rtm;
2348	struct nlmsghdr *nlh;
2349	long expires;
2350	u32 table;
2351	struct neighbour *n;
2352
2353	if (prefix) {	/* user wants prefix routes only */
2354		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2355			/* success since this is not a prefix route */
2356			return 1;
2357		}
 
 
 
 
 
 
 
 
 
 
2358	}
2359
2360	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2361	if (nlh == NULL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2362		return -EMSGSIZE;
2363
 
 
 
 
 
 
 
 
 
 
2364	rtm = nlmsg_data(nlh);
2365	rtm->rtm_family = AF_INET6;
2366	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2367	rtm->rtm_src_len = rt->rt6i_src.plen;
2368	rtm->rtm_tos = 0;
2369	if (rt->rt6i_table)
2370		table = rt->rt6i_table->tb6_id;
2371	else
2372		table = RT6_TABLE_UNSPEC;
2373	rtm->rtm_table = table;
2374	NLA_PUT_U32(skb, RTA_TABLE, table);
2375	if (rt->rt6i_flags&RTF_REJECT)
2376		rtm->rtm_type = RTN_UNREACHABLE;
2377	else if (rt->rt6i_flags&RTF_LOCAL)
2378		rtm->rtm_type = RTN_LOCAL;
2379	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2380		rtm->rtm_type = RTN_LOCAL;
2381	else
2382		rtm->rtm_type = RTN_UNICAST;
2383	rtm->rtm_flags = 0;
2384	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2385	rtm->rtm_protocol = rt->rt6i_protocol;
2386	if (rt->rt6i_flags&RTF_DYNAMIC)
2387		rtm->rtm_protocol = RTPROT_REDIRECT;
2388	else if (rt->rt6i_flags & RTF_ADDRCONF)
2389		rtm->rtm_protocol = RTPROT_KERNEL;
2390	else if (rt->rt6i_flags&RTF_DEFAULT)
2391		rtm->rtm_protocol = RTPROT_RA;
2392
2393	if (rt->rt6i_flags&RTF_CACHE)
2394		rtm->rtm_flags |= RTM_F_CLONED;
2395
2396	if (dst) {
2397		NLA_PUT(skb, RTA_DST, 16, dst);
 
2398		rtm->rtm_dst_len = 128;
2399	} else if (rtm->rtm_dst_len)
2400		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2401#ifdef CONFIG_IPV6_SUBTREES
2402	if (src) {
2403		NLA_PUT(skb, RTA_SRC, 16, src);
 
2404		rtm->rtm_src_len = 128;
2405	} else if (rtm->rtm_src_len)
2406		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2407#endif
2408	if (iif) {
2409#ifdef CONFIG_IPV6_MROUTE
2410		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2411			int err = ip6mr_get_route(net, skb, rtm, nowait);
2412			if (err <= 0) {
2413				if (!nowait) {
2414					if (err == 0)
2415						return 0;
2416					goto nla_put_failure;
2417				} else {
2418					if (err == -EMSGSIZE)
2419						goto nla_put_failure;
2420				}
2421			}
2422		} else
2423#endif
2424			NLA_PUT_U32(skb, RTA_IIF, iif);
2425	} else if (dst) {
 
2426		struct in6_addr saddr_buf;
2427		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2428			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2429	}
2430
2431	if (rt->rt6i_prefsrc.plen) {
2432		struct in6_addr saddr_buf;
2433		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2434		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2435	}
2436
2437	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 
2438		goto nla_put_failure;
2439
2440	rcu_read_lock();
2441	n = dst_get_neighbour(&rt->dst);
2442	if (n)
2443		NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444	rcu_read_unlock();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2445
2446	if (rt->dst.dev)
2447		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 
 
 
 
 
 
 
 
2448
2449	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
 
 
 
2450
2451	if (!(rt->rt6i_flags & RTF_EXPIRES))
2452		expires = 0;
2453	else if (rt->rt6i_expires - jiffies < INT_MAX)
2454		expires = rt->rt6i_expires - jiffies;
2455	else
2456		expires = INT_MAX;
 
 
 
 
 
 
 
 
 
 
2457
2458	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2459			       expires, rt->dst.error) < 0)
 
 
2460		goto nla_put_failure;
2461
2462	return nlmsg_end(skb, nlh);
 
 
2463
2464nla_put_failure:
2465	nlmsg_cancel(skb, nlh);
2466	return -EMSGSIZE;
2467}
2468
2469int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2470{
2471	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2472	int prefix;
 
 
 
2473
2474	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2475		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2476		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2477	} else
2478		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2479
2480	return rt6_fill_node(arg->net,
2481		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2482		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2483		     prefix, 0, NLM_F_MULTI);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2484}
2485
2486static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 
2487{
2488	struct net *net = sock_net(in_skb->sk);
2489	struct nlattr *tb[RTA_MAX+1];
 
 
 
2490	struct rt6_info *rt;
2491	struct sk_buff *skb;
2492	struct rtmsg *rtm;
2493	struct flowi6 fl6;
2494	int err, iif = 0;
2495
2496	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2497	if (err < 0)
2498		goto errout;
2499
2500	err = -EINVAL;
2501	memset(&fl6, 0, sizeof(fl6));
 
 
2502
2503	if (tb[RTA_SRC]) {
2504		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2505			goto errout;
2506
2507		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2508	}
2509
2510	if (tb[RTA_DST]) {
2511		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2512			goto errout;
2513
2514		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2515	}
2516
2517	if (tb[RTA_IIF])
2518		iif = nla_get_u32(tb[RTA_IIF]);
2519
2520	if (tb[RTA_OIF])
2521		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
2523	if (iif) {
2524		struct net_device *dev;
2525		dev = __dev_get_by_index(net, iif);
 
 
 
 
2526		if (!dev) {
 
2527			err = -ENODEV;
2528			goto errout;
2529		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2530	}
2531
2532	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2533	if (skb == NULL) {
 
2534		err = -ENOBUFS;
2535		goto errout;
2536	}
2537
2538	/* Reserve room for dummy headers, this skb can pass
2539	   through good chunk of routing engine.
2540	 */
2541	skb_reset_mac_header(skb);
2542	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2543
2544	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2545	skb_dst_set(skb, &rt->dst);
2546
2547	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2548			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2549			    nlh->nlmsg_seq, 0, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2550	if (err < 0) {
2551		kfree_skb(skb);
2552		goto errout;
2553	}
2554
2555	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2556errout:
2557	return err;
2558}
2559
2560void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2561{
2562	struct sk_buff *skb;
2563	struct net *net = info->nl_net;
2564	u32 seq;
2565	int err;
2566
2567	err = -ENOBUFS;
2568	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2569
2570	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2571	if (skb == NULL)
2572		goto errout;
2573
2574	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2575				event, info->pid, seq, 0, 0, 0);
2576	if (err < 0) {
2577		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2578		WARN_ON(err == -EMSGSIZE);
2579		kfree_skb(skb);
2580		goto errout;
2581	}
2582	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2583		    info->nlh, gfp_any());
2584	return;
2585errout:
2586	if (err < 0)
2587		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2588}
2589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2590static int ip6_route_dev_notify(struct notifier_block *this,
2591				unsigned long event, void *data)
2592{
2593	struct net_device *dev = (struct net_device *)data;
2594	struct net *net = dev_net(dev);
2595
2596	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
2597		net->ipv6.ip6_null_entry->dst.dev = dev;
2598		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2599#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2600		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2601		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2602		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2603		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2604#endif
 
 
 
 
 
 
 
 
 
 
2605	}
2606
2607	return NOTIFY_OK;
2608}
2609
2610/*
2611 *	/proc
2612 */
2613
2614#ifdef CONFIG_PROC_FS
2615
2616struct rt6_proc_arg
2617{
2618	char *buffer;
2619	int offset;
2620	int length;
2621	int skip;
2622	int len;
2623};
2624
2625static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2626{
2627	struct seq_file *m = p_arg;
2628	struct neighbour *n;
2629
2630	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2631
2632#ifdef CONFIG_IPV6_SUBTREES
2633	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2634#else
2635	seq_puts(m, "00000000000000000000000000000000 00 ");
2636#endif
2637	rcu_read_lock();
2638	n = dst_get_neighbour(&rt->dst);
2639	if (n) {
2640		seq_printf(m, "%pi6", n->primary_key);
2641	} else {
2642		seq_puts(m, "00000000000000000000000000000000");
2643	}
2644	rcu_read_unlock();
2645	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2646		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2647		   rt->dst.__use, rt->rt6i_flags,
2648		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2649	return 0;
2650}
2651
2652static int ipv6_route_show(struct seq_file *m, void *v)
2653{
2654	struct net *net = (struct net *)m->private;
2655	fib6_clean_all(net, rt6_info_route, 0, m);
2656	return 0;
2657}
2658
2659static int ipv6_route_open(struct inode *inode, struct file *file)
2660{
2661	return single_open_net(inode, file, ipv6_route_show);
2662}
2663
2664static const struct file_operations ipv6_route_proc_fops = {
2665	.owner		= THIS_MODULE,
2666	.open		= ipv6_route_open,
2667	.read		= seq_read,
2668	.llseek		= seq_lseek,
2669	.release	= single_release_net,
2670};
2671
2672static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2673{
2674	struct net *net = (struct net *)seq->private;
2675	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2676		   net->ipv6.rt6_stats->fib_nodes,
2677		   net->ipv6.rt6_stats->fib_route_nodes,
2678		   net->ipv6.rt6_stats->fib_rt_alloc,
2679		   net->ipv6.rt6_stats->fib_rt_entries,
2680		   net->ipv6.rt6_stats->fib_rt_cache,
2681		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2682		   net->ipv6.rt6_stats->fib_discarded_routes);
2683
2684	return 0;
2685}
2686
2687static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2688{
2689	return single_open_net(inode, file, rt6_stats_seq_show);
2690}
2691
2692static const struct file_operations rt6_stats_seq_fops = {
2693	.owner	 = THIS_MODULE,
2694	.open	 = rt6_stats_seq_open,
2695	.read	 = seq_read,
2696	.llseek	 = seq_lseek,
2697	.release = single_release_net,
2698};
2699#endif	/* CONFIG_PROC_FS */
2700
2701#ifdef CONFIG_SYSCTL
2702
2703static
2704int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2705			      void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
2707	struct net *net;
2708	int delay;
 
2709	if (!write)
2710		return -EINVAL;
2711
2712	net = (struct net *)ctl->extra1;
2713	delay = net->ipv6.sysctl.flush_delay;
2714	proc_dointvec(ctl, write, buffer, lenp, ppos);
2715	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 
 
 
2716	return 0;
2717}
2718
2719ctl_table ipv6_route_table_template[] = {
2720	{
2721		.procname	=	"flush",
2722		.data		=	&init_net.ipv6.sysctl.flush_delay,
2723		.maxlen		=	sizeof(int),
2724		.mode		=	0200,
2725		.proc_handler	=	ipv6_sysctl_rtcache_flush
2726	},
2727	{
2728		.procname	=	"gc_thresh",
2729		.data		=	&ip6_dst_ops_template.gc_thresh,
2730		.maxlen		=	sizeof(int),
2731		.mode		=	0644,
2732		.proc_handler	=	proc_dointvec,
2733	},
2734	{
2735		.procname	=	"max_size",
2736		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2737		.maxlen		=	sizeof(int),
2738		.mode		=	0644,
2739		.proc_handler	=	proc_dointvec,
2740	},
2741	{
2742		.procname	=	"gc_min_interval",
2743		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2744		.maxlen		=	sizeof(int),
2745		.mode		=	0644,
2746		.proc_handler	=	proc_dointvec_jiffies,
2747	},
2748	{
2749		.procname	=	"gc_timeout",
2750		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2751		.maxlen		=	sizeof(int),
2752		.mode		=	0644,
2753		.proc_handler	=	proc_dointvec_jiffies,
2754	},
2755	{
2756		.procname	=	"gc_interval",
2757		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2758		.maxlen		=	sizeof(int),
2759		.mode		=	0644,
2760		.proc_handler	=	proc_dointvec_jiffies,
2761	},
2762	{
2763		.procname	=	"gc_elasticity",
2764		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2765		.maxlen		=	sizeof(int),
2766		.mode		=	0644,
2767		.proc_handler	=	proc_dointvec,
2768	},
2769	{
2770		.procname	=	"mtu_expires",
2771		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2772		.maxlen		=	sizeof(int),
2773		.mode		=	0644,
2774		.proc_handler	=	proc_dointvec_jiffies,
2775	},
2776	{
2777		.procname	=	"min_adv_mss",
2778		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2779		.maxlen		=	sizeof(int),
2780		.mode		=	0644,
2781		.proc_handler	=	proc_dointvec,
2782	},
2783	{
2784		.procname	=	"gc_min_interval_ms",
2785		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2786		.maxlen		=	sizeof(int),
2787		.mode		=	0644,
2788		.proc_handler	=	proc_dointvec_ms_jiffies,
2789	},
 
 
 
 
 
 
 
 
 
2790	{ }
2791};
2792
2793struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2794{
2795	struct ctl_table *table;
2796
2797	table = kmemdup(ipv6_route_table_template,
2798			sizeof(ipv6_route_table_template),
2799			GFP_KERNEL);
2800
2801	if (table) {
2802		table[0].data = &net->ipv6.sysctl.flush_delay;
2803		table[0].extra1 = net;
2804		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2805		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
2806		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2807		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2808		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2809		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2810		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2811		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2812		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
 
 
 
 
2813	}
2814
2815	return table;
2816}
 
 
 
 
 
 
 
 
 
2817#endif
2818
2819static int __net_init ip6_route_net_init(struct net *net)
2820{
2821	int ret = -ENOMEM;
2822
2823	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2824	       sizeof(net->ipv6.ip6_dst_ops));
2825
2826	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2827		goto out_ip6_dst_ops;
2828
 
 
 
 
 
 
2829	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2830					   sizeof(*net->ipv6.ip6_null_entry),
2831					   GFP_KERNEL);
2832	if (!net->ipv6.ip6_null_entry)
2833		goto out_ip6_dst_entries;
2834	net->ipv6.ip6_null_entry->dst.path =
2835		(struct dst_entry *)net->ipv6.ip6_null_entry;
2836	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2837	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2838			 ip6_template_metrics, true);
 
2839
2840#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2841	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2842					       sizeof(*net->ipv6.ip6_prohibit_entry),
2843					       GFP_KERNEL);
2844	if (!net->ipv6.ip6_prohibit_entry)
2845		goto out_ip6_null_entry;
2846	net->ipv6.ip6_prohibit_entry->dst.path =
2847		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2848	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2849	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2850			 ip6_template_metrics, true);
 
2851
2852	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2853					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2854					       GFP_KERNEL);
2855	if (!net->ipv6.ip6_blk_hole_entry)
2856		goto out_ip6_prohibit_entry;
2857	net->ipv6.ip6_blk_hole_entry->dst.path =
2858		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2859	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2860	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2861			 ip6_template_metrics, true);
 
 
 
 
2862#endif
2863
2864	net->ipv6.sysctl.flush_delay = 0;
2865	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2866	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2867	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2868	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2869	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2870	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2871	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
2872
2873#ifdef CONFIG_PROC_FS
2874	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2875	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2876#endif
2877	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2878
2879	ret = 0;
2880out:
2881	return ret;
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884out_ip6_prohibit_entry:
2885	kfree(net->ipv6.ip6_prohibit_entry);
2886out_ip6_null_entry:
2887	kfree(net->ipv6.ip6_null_entry);
2888#endif
 
 
2889out_ip6_dst_entries:
2890	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2891out_ip6_dst_ops:
2892	goto out;
2893}
2894
2895static void __net_exit ip6_route_net_exit(struct net *net)
2896{
2897#ifdef CONFIG_PROC_FS
2898	proc_net_remove(net, "ipv6_route");
2899	proc_net_remove(net, "rt6_stats");
2900#endif
2901	kfree(net->ipv6.ip6_null_entry);
2902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2903	kfree(net->ipv6.ip6_prohibit_entry);
2904	kfree(net->ipv6.ip6_blk_hole_entry);
2905#endif
2906	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2907}
2908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2909static struct pernet_operations ip6_route_net_ops = {
2910	.init = ip6_route_net_init,
2911	.exit = ip6_route_net_exit,
2912};
2913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2914static struct notifier_block ip6_route_dev_notifier = {
2915	.notifier_call = ip6_route_dev_notify,
2916	.priority = 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2917};
2918
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2919int __init ip6_route_init(void)
2920{
2921	int ret;
 
2922
2923	ret = -ENOMEM;
2924	ip6_dst_ops_template.kmem_cachep =
2925		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2926				  SLAB_HWCACHE_ALIGN, NULL);
2927	if (!ip6_dst_ops_template.kmem_cachep)
2928		goto out;
2929
2930	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2931	if (ret)
2932		goto out_kmem_cache;
2933
2934	ret = register_pernet_subsys(&ip6_route_net_ops);
2935	if (ret)
2936		goto out_dst_entries;
2937
 
 
 
 
2938	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2939
2940	/* Registering of the loopback is done before this portion of code,
2941	 * the loopback reference in rt6_info will not be taken, do it
2942	 * manually for init_net */
2943	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2944	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2945  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2947	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2948	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2949	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2950  #endif
2951	ret = fib6_init();
2952	if (ret)
2953		goto out_register_subsys;
2954
2955	ret = xfrm6_init();
2956	if (ret)
2957		goto out_fib6_init;
2958
2959	ret = fib6_rules_init();
2960	if (ret)
2961		goto xfrm6_init;
2962
2963	ret = -ENOBUFS;
2964	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2965	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2966	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2967		goto fib6_rules_init;
2968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2969	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2970	if (ret)
2971		goto fib6_rules_init;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2972
2973out:
2974	return ret;
2975
 
 
 
2976fib6_rules_init:
2977	fib6_rules_cleanup();
2978xfrm6_init:
2979	xfrm6_fini();
2980out_fib6_init:
2981	fib6_gc_cleanup();
2982out_register_subsys:
2983	unregister_pernet_subsys(&ip6_route_net_ops);
 
 
2984out_dst_entries:
2985	dst_entries_destroy(&ip6_dst_blackhole_ops);
2986out_kmem_cache:
2987	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2988	goto out;
2989}
2990
2991void ip6_route_cleanup(void)
2992{
 
 
 
 
 
2993	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 
2994	fib6_rules_cleanup();
2995	xfrm6_fini();
2996	fib6_gc_cleanup();
 
2997	unregister_pernet_subsys(&ip6_route_net_ops);
2998	dst_entries_destroy(&ip6_dst_blackhole_ops);
2999	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3000}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <linux/siphash.h>
  45#include <net/net_namespace.h>
  46#include <net/snmp.h>
  47#include <net/ipv6.h>
  48#include <net/ip6_fib.h>
  49#include <net/ip6_route.h>
  50#include <net/ndisc.h>
  51#include <net/addrconf.h>
  52#include <net/tcp.h>
  53#include <linux/rtnetlink.h>
  54#include <net/dst.h>
  55#include <net/dst_metadata.h>
  56#include <net/xfrm.h>
  57#include <net/netevent.h>
  58#include <net/netlink.h>
  59#include <net/rtnh.h>
  60#include <net/lwtunnel.h>
  61#include <net/ip_tunnels.h>
  62#include <net/l3mdev.h>
  63#include <net/ip.h>
  64#include <linux/uaccess.h>
  65#include <linux/btf_ids.h>
  66
  67#ifdef CONFIG_SYSCTL
  68#include <linux/sysctl.h>
  69#endif
  70
  71static int ip6_rt_type_to_error(u8 fib6_type);
 
  72
  73#define CREATE_TRACE_POINTS
  74#include <trace/events/fib6.h>
  75EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  76#undef CREATE_TRACE_POINTS
  77
  78enum rt6_nud_state {
  79	RT6_NUD_FAIL_HARD = -3,
  80	RT6_NUD_FAIL_PROBE = -2,
  81	RT6_NUD_FAIL_DO_RR = -1,
  82	RT6_NUD_SUCCEED = 1
  83};
  84
  85INDIRECT_CALLABLE_SCOPE
  86struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 
  87static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  88INDIRECT_CALLABLE_SCOPE
  89unsigned int		ip6_mtu(const struct dst_entry *dst);
  90static void		ip6_negative_advice(struct sock *sk,
  91					    struct dst_entry *dst);
  92static void		ip6_dst_destroy(struct dst_entry *);
  93static void		ip6_dst_ifdown(struct dst_entry *,
  94				       struct net_device *dev);
  95static void		 ip6_dst_gc(struct dst_ops *ops);
  96
  97static int		ip6_pkt_discard(struct sk_buff *skb);
  98static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  99static int		ip6_pkt_prohibit(struct sk_buff *skb);
 100static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 101static void		ip6_link_failure(struct sk_buff *skb);
 102static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 103					   struct sk_buff *skb, u32 mtu,
 104					   bool confirm_neigh);
 105static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 106					struct sk_buff *skb);
 107static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 108			   int strict);
 109static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 110static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 111			 struct fib6_info *rt, struct dst_entry *dst,
 112			 struct in6_addr *dest, struct in6_addr *src,
 113			 int iif, int type, u32 portid, u32 seq,
 114			 unsigned int flags);
 115static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 116					   const struct in6_addr *daddr,
 117					   const struct in6_addr *saddr);
 118
 119#ifdef CONFIG_IPV6_ROUTE_INFO
 120static struct fib6_info *rt6_add_route_info(struct net *net,
 121					   const struct in6_addr *prefix, int prefixlen,
 122					   const struct in6_addr *gwaddr,
 123					   struct net_device *dev,
 124					   unsigned int pref);
 125static struct fib6_info *rt6_get_route_info(struct net *net,
 126					   const struct in6_addr *prefix, int prefixlen,
 127					   const struct in6_addr *gwaddr,
 128					   struct net_device *dev);
 129#endif
 130
 131struct uncached_list {
 132	spinlock_t		lock;
 133	struct list_head	head;
 134	struct list_head	quarantine;
 135};
 136
 137static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 138
 139void rt6_uncached_list_add(struct rt6_info *rt)
 140{
 141	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
 
 142
 143	rt->dst.rt_uncached_list = ul;
 
 144
 145	spin_lock_bh(&ul->lock);
 146	list_add_tail(&rt->dst.rt_uncached, &ul->head);
 147	spin_unlock_bh(&ul->lock);
 148}
 149
 150void rt6_uncached_list_del(struct rt6_info *rt)
 151{
 152	if (!list_empty(&rt->dst.rt_uncached)) {
 153		struct uncached_list *ul = rt->dst.rt_uncached_list;
 154
 155		spin_lock_bh(&ul->lock);
 156		list_del_init(&rt->dst.rt_uncached);
 157		spin_unlock_bh(&ul->lock);
 158	}
 159}
 160
 161static void rt6_uncached_list_flush_dev(struct net_device *dev)
 162{
 163	int cpu;
 164
 165	for_each_possible_cpu(cpu) {
 166		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 167		struct rt6_info *rt, *safe;
 168
 169		if (list_empty(&ul->head))
 170			continue;
 171
 172		spin_lock_bh(&ul->lock);
 173		list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
 174			struct inet6_dev *rt_idev = rt->rt6i_idev;
 175			struct net_device *rt_dev = rt->dst.dev;
 176			bool handled = false;
 177
 178			if (rt_idev->dev == dev) {
 179				rt->rt6i_idev = in6_dev_get(blackhole_netdev);
 180				in6_dev_put(rt_idev);
 181				handled = true;
 182			}
 183
 184			if (rt_dev == dev) {
 185				rt->dst.dev = blackhole_netdev;
 186				netdev_ref_replace(rt_dev, blackhole_netdev,
 187						   &rt->dst.dev_tracker,
 188						   GFP_ATOMIC);
 189				handled = true;
 190			}
 191			if (handled)
 192				list_move(&rt->dst.rt_uncached,
 193					  &ul->quarantine);
 194		}
 195		spin_unlock_bh(&ul->lock);
 196	}
 
 197}
 198
 199static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 200					     struct sk_buff *skb,
 201					     const void *daddr)
 202{
 203	if (!ipv6_addr_any(p))
 204		return (const void *) p;
 205	else if (skb)
 206		return &ipv6_hdr(skb)->daddr;
 207	return daddr;
 208}
 209
 210struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 211				   struct net_device *dev,
 212				   struct sk_buff *skb,
 213				   const void *daddr)
 214{
 215	struct neighbour *n;
 216
 217	daddr = choose_neigh_daddr(gw, skb, daddr);
 218	n = __ipv6_neigh_lookup(dev, daddr);
 219	if (n)
 220		return n;
 221
 222	n = neigh_create(&nd_tbl, daddr, dev);
 223	return IS_ERR(n) ? NULL : n;
 224}
 225
 226static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 227					      struct sk_buff *skb,
 228					      const void *daddr)
 229{
 230	const struct rt6_info *rt = dst_rt6_info(dst);
 231
 232	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 233				dst->dev, skb, daddr);
 234}
 235
 236static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 237{
 238	const struct rt6_info *rt = dst_rt6_info(dst);
 239	struct net_device *dev = dst->dev;
 240
 241	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 242	if (!daddr)
 243		return;
 244	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 245		return;
 246	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 247		return;
 248	__ipv6_confirm_neigh(dev, daddr);
 249}
 250
 251static struct dst_ops ip6_dst_ops_template = {
 252	.family			=	AF_INET6,
 
 253	.gc			=	ip6_dst_gc,
 254	.gc_thresh		=	1024,
 255	.check			=	ip6_dst_check,
 256	.default_advmss		=	ip6_default_advmss,
 257	.mtu			=	ip6_mtu,
 258	.cow_metrics		=	dst_cow_metrics_generic,
 259	.destroy		=	ip6_dst_destroy,
 260	.ifdown			=	ip6_dst_ifdown,
 261	.negative_advice	=	ip6_negative_advice,
 262	.link_failure		=	ip6_link_failure,
 263	.update_pmtu		=	ip6_rt_update_pmtu,
 264	.redirect		=	rt6_do_redirect,
 265	.local_out		=	__ip6_local_out,
 266	.neigh_lookup		=	ip6_dst_neigh_lookup,
 267	.confirm_neigh		=	ip6_confirm_neigh,
 268};
 269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 270static struct dst_ops ip6_dst_blackhole_ops = {
 271	.family			= AF_INET6,
 272	.default_advmss		= ip6_default_advmss,
 273	.neigh_lookup		= ip6_dst_neigh_lookup,
 274	.check			= ip6_dst_check,
 275	.destroy		= ip6_dst_destroy,
 276	.cow_metrics		= dst_cow_metrics_generic,
 277	.update_pmtu		= dst_blackhole_update_pmtu,
 278	.redirect		= dst_blackhole_redirect,
 279	.mtu			= dst_blackhole_mtu,
 280};
 281
 282static const u32 ip6_template_metrics[RTAX_MAX] = {
 283	[RTAX_HOPLIMIT - 1] = 0,
 284};
 285
 286static const struct fib6_info fib6_null_entry_template = {
 287	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 288	.fib6_protocol  = RTPROT_KERNEL,
 289	.fib6_metric	= ~(u32)0,
 290	.fib6_ref	= REFCOUNT_INIT(1),
 291	.fib6_type	= RTN_UNREACHABLE,
 292	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 293};
 294
 295static const struct rt6_info ip6_null_entry_template = {
 296	.dst = {
 297		.__rcuref	= RCUREF_INIT(1),
 298		.__use		= 1,
 299		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 300		.error		= -ENETUNREACH,
 301		.input		= ip6_pkt_discard,
 302		.output		= ip6_pkt_discard_out,
 303	},
 304	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 305};
 306
 307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 308
 309static const struct rt6_info ip6_prohibit_entry_template = {
 
 
 
 310	.dst = {
 311		.__rcuref	= RCUREF_INIT(1),
 312		.__use		= 1,
 313		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 314		.error		= -EACCES,
 315		.input		= ip6_pkt_prohibit,
 316		.output		= ip6_pkt_prohibit_out,
 317	},
 318	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 319};
 320
 321static const struct rt6_info ip6_blk_hole_entry_template = {
 322	.dst = {
 323		.__rcuref	= RCUREF_INIT(1),
 324		.__use		= 1,
 325		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 326		.error		= -EINVAL,
 327		.input		= dst_discard,
 328		.output		= dst_discard_out,
 329	},
 330	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 331};
 332
 333#endif
 334
 335static void rt6_info_init(struct rt6_info *rt)
 336{
 337	memset_after(rt, 0, dst);
 338}
 339
 340/* allocate dst with ip6_dst_ops */
 341struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 342			       int flags)
 
 343{
 344	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 345					DST_OBSOLETE_FORCE_CHK, flags);
 346
 347	if (rt) {
 348		rt6_info_init(rt);
 349		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 350	}
 351
 352	return rt;
 353}
 354EXPORT_SYMBOL(ip6_dst_alloc);
 355
 356static void ip6_dst_destroy(struct dst_entry *dst)
 357{
 358	struct rt6_info *rt = dst_rt6_info(dst);
 359	struct fib6_info *from;
 360	struct inet6_dev *idev;
 361
 362	ip_dst_metrics_put(dst);
 363	rt6_uncached_list_del(rt);
 364
 365	idev = rt->rt6i_idev;
 366	if (idev) {
 367		rt->rt6i_idev = NULL;
 368		in6_dev_put(idev);
 369	}
 370
 371	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 372	fib6_info_release(from);
 
 373}
 374
 375static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 376{
 377	struct rt6_info *rt = dst_rt6_info(dst);
 378	struct inet6_dev *idev = rt->rt6i_idev;
 379
 380	if (idev && idev->dev != blackhole_netdev) {
 381		struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
 382
 383		if (blackhole_idev) {
 384			rt->rt6i_idev = blackhole_idev;
 385			in6_dev_put(idev);
 386		}
 387	}
 388}
 389
 390static bool __rt6_check_expired(const struct rt6_info *rt)
 391{
 392	if (rt->rt6i_flags & RTF_EXPIRES)
 393		return time_after(jiffies, rt->dst.expires);
 394	else
 395		return false;
 396}
 397
 398static bool rt6_check_expired(const struct rt6_info *rt)
 399{
 400	struct fib6_info *from;
 401
 402	from = rcu_dereference(rt->from);
 403
 404	if (rt->rt6i_flags & RTF_EXPIRES) {
 405		if (time_after(jiffies, rt->dst.expires))
 406			return true;
 407	} else if (from) {
 408		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 409			fib6_check_expired(from);
 410	}
 411	return false;
 412}
 413
 414void fib6_select_path(const struct net *net, struct fib6_result *res,
 415		      struct flowi6 *fl6, int oif, bool have_oif_match,
 416		      const struct sk_buff *skb, int strict)
 417{
 418	struct fib6_info *sibling, *next_sibling;
 419	struct fib6_info *match = res->f6i;
 
 
 420
 421	if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
 422		goto out;
 423
 424	if (match->nh && have_oif_match && res->nh)
 425		return;
 426
 427	if (skb)
 428		IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
 429
 430	/* We might have already computed the hash for ICMPv6 errors. In such
 431	 * case it will always be non-zero. Otherwise now is the time to do it.
 432	 */
 433	if (!fl6->mp_hash &&
 434	    (!match->nh || nexthop_is_multipath(match->nh)))
 435		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 436
 437	if (unlikely(match->nh)) {
 438		nexthop_path_fib6_result(res, fl6->mp_hash);
 439		return;
 440	}
 441
 442	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 443		goto out;
 444
 445	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 446				 fib6_siblings) {
 447		const struct fib6_nh *nh = sibling->fib6_nh;
 448		int nh_upper_bound;
 449
 450		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 451		if (fl6->mp_hash > nh_upper_bound)
 452			continue;
 453		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 454			break;
 455		match = sibling;
 456		break;
 457	}
 458
 459out:
 460	res->f6i = match;
 461	res->nh = match->fib6_nh;
 462}
 463
 464/*
 465 *	Route lookup. rcu_read_lock() should be held.
 466 */
 467
 468static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 469			       const struct in6_addr *saddr, int oif, int flags)
 470{
 471	const struct net_device *dev;
 472
 473	if (nh->fib_nh_flags & RTNH_F_DEAD)
 474		return false;
 475
 476	dev = nh->fib_nh_dev;
 477	if (oif) {
 478		if (dev->ifindex == oif)
 479			return true;
 480	} else {
 481		if (ipv6_chk_addr(net, saddr, dev,
 482				  flags & RT6_LOOKUP_F_IFACE))
 483			return true;
 484	}
 485
 486	return false;
 487}
 488
 489struct fib6_nh_dm_arg {
 490	struct net		*net;
 491	const struct in6_addr	*saddr;
 492	int			oif;
 493	int			flags;
 494	struct fib6_nh		*nh;
 495};
 496
 497static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 498{
 499	struct fib6_nh_dm_arg *arg = _arg;
 500
 501	arg->nh = nh;
 502	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 503				  arg->flags);
 504}
 505
 506/* returns fib6_nh from nexthop or NULL */
 507static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 508					struct fib6_result *res,
 509					const struct in6_addr *saddr,
 510					int oif, int flags)
 511{
 512	struct fib6_nh_dm_arg arg = {
 513		.net   = net,
 514		.saddr = saddr,
 515		.oif   = oif,
 516		.flags = flags,
 517	};
 518
 519	if (nexthop_is_blackhole(nh))
 520		return NULL;
 521
 522	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 523		return arg.nh;
 524
 525	return NULL;
 526}
 527
 528static void rt6_device_match(struct net *net, struct fib6_result *res,
 529			     const struct in6_addr *saddr, int oif, int flags)
 530{
 531	struct fib6_info *f6i = res->f6i;
 532	struct fib6_info *spf6i;
 533	struct fib6_nh *nh;
 534
 535	if (!oif && ipv6_addr_any(saddr)) {
 536		if (unlikely(f6i->nh)) {
 537			nh = nexthop_fib6_nh(f6i->nh);
 538			if (nexthop_is_blackhole(f6i->nh))
 539				goto out_blackhole;
 540		} else {
 541			nh = f6i->fib6_nh;
 542		}
 543		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 544			goto out;
 545	}
 546
 547	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 548		bool matched = false;
 549
 550		if (unlikely(spf6i->nh)) {
 551			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 552					      oif, flags);
 553			if (nh)
 554				matched = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 555		} else {
 556			nh = spf6i->fib6_nh;
 557			if (__rt6_device_match(net, nh, saddr, oif, flags))
 558				matched = true;
 559		}
 560		if (matched) {
 561			res->f6i = spf6i;
 562			goto out;
 563		}
 564	}
 565
 566	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 567		res->f6i = net->ipv6.fib6_null_entry;
 568		nh = res->f6i->fib6_nh;
 569		goto out;
 570	}
 571
 572	if (unlikely(f6i->nh)) {
 573		nh = nexthop_fib6_nh(f6i->nh);
 574		if (nexthop_is_blackhole(f6i->nh))
 575			goto out_blackhole;
 576	} else {
 577		nh = f6i->fib6_nh;
 578	}
 579
 580	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 581		res->f6i = net->ipv6.fib6_null_entry;
 582		nh = res->f6i->fib6_nh;
 583	}
 584out:
 585	res->nh = nh;
 586	res->fib6_type = res->f6i->fib6_type;
 587	res->fib6_flags = res->f6i->fib6_flags;
 588	return;
 589
 590out_blackhole:
 591	res->fib6_flags |= RTF_REJECT;
 592	res->fib6_type = RTN_BLACKHOLE;
 593	res->nh = nh;
 594}
 595
 596#ifdef CONFIG_IPV6_ROUTER_PREF
 597struct __rt6_probe_work {
 598	struct work_struct work;
 599	struct in6_addr target;
 600	struct net_device *dev;
 601	netdevice_tracker dev_tracker;
 602};
 603
 604static void rt6_probe_deferred(struct work_struct *w)
 605{
 606	struct in6_addr mcaddr;
 607	struct __rt6_probe_work *work =
 608		container_of(w, struct __rt6_probe_work, work);
 609
 610	addrconf_addr_solict_mult(&work->target, &mcaddr);
 611	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 612	netdev_put(work->dev, &work->dev_tracker);
 613	kfree(work);
 614}
 615
 616static void rt6_probe(struct fib6_nh *fib6_nh)
 617{
 618	struct __rt6_probe_work *work = NULL;
 619	const struct in6_addr *nh_gw;
 620	unsigned long last_probe;
 621	struct neighbour *neigh;
 622	struct net_device *dev;
 623	struct inet6_dev *idev;
 624
 625	/*
 626	 * Okay, this does not seem to be appropriate
 627	 * for now, however, we need to check if it
 628	 * is really so; aka Router Reachability Probing.
 629	 *
 630	 * Router Reachability Probe MUST be rate-limited
 631	 * to no more than one per minute.
 632	 */
 633	if (!fib6_nh->fib_nh_gw_family)
 634		return;
 635
 636	nh_gw = &fib6_nh->fib_nh_gw6;
 637	dev = fib6_nh->fib_nh_dev;
 638	rcu_read_lock();
 639	last_probe = READ_ONCE(fib6_nh->last_probe);
 640	idev = __in6_dev_get(dev);
 641	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 642	if (neigh) {
 643		if (READ_ONCE(neigh->nud_state) & NUD_VALID)
 644			goto out;
 645
 646		write_lock_bh(&neigh->lock);
 647		if (!(neigh->nud_state & NUD_VALID) &&
 648		    time_after(jiffies,
 649			       neigh->updated +
 650			       READ_ONCE(idev->cnf.rtr_probe_interval))) {
 651			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 652			if (work)
 653				__neigh_set_probe_once(neigh);
 654		}
 655		write_unlock_bh(&neigh->lock);
 656	} else if (time_after(jiffies, last_probe +
 657				       READ_ONCE(idev->cnf.rtr_probe_interval))) {
 658		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 659	}
 660
 661	if (!work || cmpxchg(&fib6_nh->last_probe,
 662			     last_probe, jiffies) != last_probe) {
 663		kfree(work);
 664	} else {
 665		INIT_WORK(&work->work, rt6_probe_deferred);
 666		work->target = *nh_gw;
 667		netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
 668		work->dev = dev;
 669		schedule_work(&work->work);
 670	}
 671
 672out:
 673	rcu_read_unlock();
 674}
 675#else
 676static inline void rt6_probe(struct fib6_nh *fib6_nh)
 677{
 678}
 679#endif
 680
 681/*
 682 * Default Router Selection (RFC 2461 6.3.6)
 683 */
 684static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 685{
 686	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 687	struct neighbour *neigh;
 
 688
 689	rcu_read_lock();
 690	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 691					  &fib6_nh->fib_nh_gw6);
 692	if (neigh) {
 693		u8 nud_state = READ_ONCE(neigh->nud_state);
 694
 695		if (nud_state & NUD_VALID)
 696			ret = RT6_NUD_SUCCEED;
 
 697#ifdef CONFIG_IPV6_ROUTER_PREF
 698		else if (!(nud_state & NUD_FAILED))
 699			ret = RT6_NUD_SUCCEED;
 
 700		else
 701			ret = RT6_NUD_FAIL_PROBE;
 702#endif
 703	} else {
 704		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 705		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 706	}
 707	rcu_read_unlock();
 708
 709	return ret;
 710}
 711
 712static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 713			   int strict)
 714{
 715	int m = 0;
 716
 717	if (!oif || nh->fib_nh_dev->ifindex == oif)
 718		m = 2;
 719
 
 720	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 721		return RT6_NUD_FAIL_HARD;
 722#ifdef CONFIG_IPV6_ROUTER_PREF
 723	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 724#endif
 725	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 726	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 727		int n = rt6_check_neigh(nh);
 728		if (n < 0)
 729			return n;
 730	}
 731	return m;
 732}
 733
 734static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 735		       int oif, int strict, int *mpri, bool *do_rr)
 736{
 737	bool match_do_rr = false;
 738	bool rc = false;
 739	int m;
 740
 741	if (nh->fib_nh_flags & RTNH_F_DEAD)
 742		goto out;
 743
 744	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 745	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 746	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 747		goto out;
 748
 749	m = rt6_score_route(nh, fib6_flags, oif, strict);
 750	if (m == RT6_NUD_FAIL_DO_RR) {
 751		match_do_rr = true;
 752		m = 0; /* lowest valid score */
 753	} else if (m == RT6_NUD_FAIL_HARD) {
 754		goto out;
 755	}
 756
 757	if (strict & RT6_LOOKUP_F_REACHABLE)
 758		rt6_probe(nh);
 759
 760	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 761	if (m > *mpri) {
 762		*do_rr = match_do_rr;
 
 763		*mpri = m;
 764		rc = true;
 
 
 765	}
 
 766out:
 767	return rc;
 768}
 769
 770struct fib6_nh_frl_arg {
 771	u32		flags;
 772	int		oif;
 773	int		strict;
 774	int		*mpri;
 775	bool		*do_rr;
 776	struct fib6_nh	*nh;
 777};
 778
 779static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 780{
 781	struct fib6_nh_frl_arg *arg = _arg;
 782
 783	arg->nh = nh;
 784	return find_match(nh, arg->flags, arg->oif, arg->strict,
 785			  arg->mpri, arg->do_rr);
 786}
 787
 788static void __find_rr_leaf(struct fib6_info *f6i_start,
 789			   struct fib6_info *nomatch, u32 metric,
 790			   struct fib6_result *res, struct fib6_info **cont,
 791			   int oif, int strict, bool *do_rr, int *mpri)
 792{
 793	struct fib6_info *f6i;
 794
 795	for (f6i = f6i_start;
 796	     f6i && f6i != nomatch;
 797	     f6i = rcu_dereference(f6i->fib6_next)) {
 798		bool matched = false;
 799		struct fib6_nh *nh;
 800
 801		if (cont && f6i->fib6_metric != metric) {
 802			*cont = f6i;
 803			return;
 804		}
 805
 806		if (fib6_check_expired(f6i))
 807			continue;
 808
 809		if (unlikely(f6i->nh)) {
 810			struct fib6_nh_frl_arg arg = {
 811				.flags  = f6i->fib6_flags,
 812				.oif    = oif,
 813				.strict = strict,
 814				.mpri   = mpri,
 815				.do_rr  = do_rr
 816			};
 817
 818			if (nexthop_is_blackhole(f6i->nh)) {
 819				res->fib6_flags = RTF_REJECT;
 820				res->fib6_type = RTN_BLACKHOLE;
 821				res->f6i = f6i;
 822				res->nh = nexthop_fib6_nh(f6i->nh);
 823				return;
 824			}
 825			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 826						     &arg)) {
 827				matched = true;
 828				nh = arg.nh;
 829			}
 830		} else {
 831			nh = f6i->fib6_nh;
 832			if (find_match(nh, f6i->fib6_flags, oif, strict,
 833				       mpri, do_rr))
 834				matched = true;
 835		}
 836		if (matched) {
 837			res->f6i = f6i;
 838			res->nh = nh;
 839			res->fib6_flags = f6i->fib6_flags;
 840			res->fib6_type = f6i->fib6_type;
 841		}
 842	}
 843}
 844
 845static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 846			 struct fib6_info *rr_head, int oif, int strict,
 847			 bool *do_rr, struct fib6_result *res)
 848{
 849	u32 metric = rr_head->fib6_metric;
 850	struct fib6_info *cont = NULL;
 851	int mpri = -1;
 852
 853	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 854		       oif, strict, do_rr, &mpri);
 855
 856	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 857		       oif, strict, do_rr, &mpri);
 858
 859	if (res->f6i || !cont)
 860		return;
 861
 862	__find_rr_leaf(cont, NULL, metric, res, NULL,
 863		       oif, strict, do_rr, &mpri);
 864}
 865
 866static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 867		       struct fib6_result *res, int strict)
 868{
 869	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 870	struct fib6_info *rt0;
 871	bool do_rr = false;
 872	int key_plen;
 873
 874	/* make sure this function or its helpers sets f6i */
 875	res->f6i = NULL;
 876
 877	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 878		goto out;
 879
 880	rt0 = rcu_dereference(fn->rr_ptr);
 881	if (!rt0)
 882		rt0 = leaf;
 883
 884	/* Double check to make sure fn is not an intermediate node
 885	 * and fn->leaf does not points to its child's leaf
 886	 * (This might happen if all routes under fn are deleted from
 887	 * the tree and fib6_repair_tree() is called on the node.)
 888	 */
 889	key_plen = rt0->fib6_dst.plen;
 890#ifdef CONFIG_IPV6_SUBTREES
 891	if (rt0->fib6_src.plen)
 892		key_plen = rt0->fib6_src.plen;
 893#endif
 894	if (fn->fn_bit != key_plen)
 895		goto out;
 896
 897	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 898	if (do_rr) {
 899		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 900
 901		/* no entries matched; do round-robin */
 902		if (!next || next->fib6_metric != rt0->fib6_metric)
 903			next = leaf;
 904
 905		if (next != rt0) {
 906			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 907			/* make sure next is not being deleted from the tree */
 908			if (next->fib6_node)
 909				rcu_assign_pointer(fn->rr_ptr, next);
 910			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 911		}
 912	}
 913
 914out:
 915	if (!res->f6i) {
 916		res->f6i = net->ipv6.fib6_null_entry;
 917		res->nh = res->f6i->fib6_nh;
 918		res->fib6_flags = res->f6i->fib6_flags;
 919		res->fib6_type = res->f6i->fib6_type;
 920	}
 921}
 922
 923static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 924{
 925	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 926	       res->nh->fib_nh_gw_family;
 927}
 928
 929#ifdef CONFIG_IPV6_ROUTE_INFO
 930int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 931		  const struct in6_addr *gwaddr)
 932{
 933	struct net *net = dev_net(dev);
 934	struct route_info *rinfo = (struct route_info *) opt;
 935	struct in6_addr prefix_buf, *prefix;
 936	struct fib6_table *table;
 937	unsigned int pref;
 938	unsigned long lifetime;
 939	struct fib6_info *rt;
 940
 941	if (len < sizeof(struct route_info)) {
 942		return -EINVAL;
 943	}
 944
 945	/* Sanity check for prefix_len and length */
 946	if (rinfo->length > 3) {
 947		return -EINVAL;
 948	} else if (rinfo->prefix_len > 128) {
 949		return -EINVAL;
 950	} else if (rinfo->prefix_len > 64) {
 951		if (rinfo->length < 2) {
 952			return -EINVAL;
 953		}
 954	} else if (rinfo->prefix_len > 0) {
 955		if (rinfo->length < 1) {
 956			return -EINVAL;
 957		}
 958	}
 959
 960	pref = rinfo->route_pref;
 961	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 962		return -EINVAL;
 963
 964	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 965
 966	if (rinfo->length == 3)
 967		prefix = (struct in6_addr *)rinfo->prefix;
 968	else {
 969		/* this function is safe */
 970		ipv6_addr_prefix(&prefix_buf,
 971				 (struct in6_addr *)rinfo->prefix,
 972				 rinfo->prefix_len);
 973		prefix = &prefix_buf;
 974	}
 975
 976	if (rinfo->prefix_len == 0)
 977		rt = rt6_get_dflt_router(net, gwaddr, dev);
 978	else
 979		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 980					gwaddr, dev);
 981
 982	if (rt && !lifetime) {
 983		ip6_del_rt(net, rt, false);
 984		rt = NULL;
 985	}
 986
 987	if (!rt && lifetime)
 988		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 989					dev, pref);
 990	else if (rt)
 991		rt->fib6_flags = RTF_ROUTEINFO |
 992				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 993
 994	if (rt) {
 995		table = rt->fib6_table;
 996		spin_lock_bh(&table->tb6_lock);
 997
 998		if (!addrconf_finite_timeout(lifetime)) {
 999			fib6_clean_expires(rt);
1000			fib6_remove_gc_list(rt);
1001		} else {
1002			fib6_set_expires(rt, jiffies + HZ * lifetime);
1003			fib6_add_gc_list(rt);
1004		}
1005
1006		spin_unlock_bh(&table->tb6_lock);
1007
1008		fib6_info_release(rt);
1009	}
1010	return 0;
1011}
1012#endif
1013
1014/*
1015 *	Misc support functions
1016 */
1017
1018/* called with rcu_lock held */
1019static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1020{
1021	struct net_device *dev = res->nh->fib_nh_dev;
1022
1023	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1024		/* for copies of local routes, dst->dev needs to be the
1025		 * device if it is a master device, the master device if
1026		 * device is enslaved, and the loopback as the default
1027		 */
1028		if (netif_is_l3_slave(dev) &&
1029		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1030			dev = l3mdev_master_dev_rcu(dev);
1031		else if (!netif_is_l3_master(dev))
1032			dev = dev_net(dev)->loopback_dev;
1033		/* last case is netif_is_l3_master(dev) is true in which
1034		 * case we want dev returned to be dev
1035		 */
1036	}
1037
1038	return dev;
1039}
1040
1041static const int fib6_prop[RTN_MAX + 1] = {
1042	[RTN_UNSPEC]	= 0,
1043	[RTN_UNICAST]	= 0,
1044	[RTN_LOCAL]	= 0,
1045	[RTN_BROADCAST]	= 0,
1046	[RTN_ANYCAST]	= 0,
1047	[RTN_MULTICAST]	= 0,
1048	[RTN_BLACKHOLE]	= -EINVAL,
1049	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1050	[RTN_PROHIBIT]	= -EACCES,
1051	[RTN_THROW]	= -EAGAIN,
1052	[RTN_NAT]	= -EINVAL,
1053	[RTN_XRESOLVE]	= -EINVAL,
1054};
1055
1056static int ip6_rt_type_to_error(u8 fib6_type)
1057{
1058	return fib6_prop[fib6_type];
1059}
1060
1061static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1062{
1063	unsigned short flags = 0;
1064
1065	if (rt->dst_nocount)
1066		flags |= DST_NOCOUNT;
1067	if (rt->dst_nopolicy)
1068		flags |= DST_NOPOLICY;
1069
1070	return flags;
1071}
1072
1073static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1074{
1075	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1076
1077	switch (fib6_type) {
1078	case RTN_BLACKHOLE:
1079		rt->dst.output = dst_discard_out;
1080		rt->dst.input = dst_discard;
1081		break;
1082	case RTN_PROHIBIT:
1083		rt->dst.output = ip6_pkt_prohibit_out;
1084		rt->dst.input = ip6_pkt_prohibit;
1085		break;
1086	case RTN_THROW:
1087	case RTN_UNREACHABLE:
1088	default:
1089		rt->dst.output = ip6_pkt_discard_out;
1090		rt->dst.input = ip6_pkt_discard;
1091		break;
1092	}
1093}
1094
1095static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1096{
1097	struct fib6_info *f6i = res->f6i;
1098
1099	if (res->fib6_flags & RTF_REJECT) {
1100		ip6_rt_init_dst_reject(rt, res->fib6_type);
1101		return;
1102	}
1103
1104	rt->dst.error = 0;
1105	rt->dst.output = ip6_output;
1106
1107	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1108		rt->dst.input = ip6_input;
1109	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1110		rt->dst.input = ip6_mc_input;
1111	} else {
1112		rt->dst.input = ip6_forward;
1113	}
1114
1115	if (res->nh->fib_nh_lws) {
1116		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1117		lwtunnel_set_redirect(&rt->dst);
1118	}
1119
1120	rt->dst.lastuse = jiffies;
1121}
1122
1123/* Caller must already hold reference to @from */
1124static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1125{
1126	rt->rt6i_flags &= ~RTF_EXPIRES;
1127	rcu_assign_pointer(rt->from, from);
1128	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1129}
1130
1131/* Caller must already hold reference to f6i in result */
1132static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1133{
1134	const struct fib6_nh *nh = res->nh;
1135	const struct net_device *dev = nh->fib_nh_dev;
1136	struct fib6_info *f6i = res->f6i;
1137
1138	ip6_rt_init_dst(rt, res);
1139
1140	rt->rt6i_dst = f6i->fib6_dst;
1141	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1142	rt->rt6i_flags = res->fib6_flags;
1143	if (nh->fib_nh_gw_family) {
1144		rt->rt6i_gateway = nh->fib_nh_gw6;
1145		rt->rt6i_flags |= RTF_GATEWAY;
1146	}
1147	rt6_set_from(rt, f6i);
1148#ifdef CONFIG_IPV6_SUBTREES
1149	rt->rt6i_src = f6i->fib6_src;
1150#endif
1151}
1152
1153static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1154					struct in6_addr *saddr)
1155{
1156	struct fib6_node *pn, *sn;
1157	while (1) {
1158		if (fn->fn_flags & RTN_TL_ROOT)
1159			return NULL;
1160		pn = rcu_dereference(fn->parent);
1161		sn = FIB6_SUBTREE(pn);
1162		if (sn && sn != fn)
1163			fn = fib6_node_lookup(sn, NULL, saddr);
1164		else
1165			fn = pn;
1166		if (fn->fn_flags & RTN_RTINFO)
1167			return fn;
1168	}
1169}
1170
1171static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1172{
1173	struct rt6_info *rt = *prt;
1174
1175	if (dst_hold_safe(&rt->dst))
1176		return true;
1177	if (net) {
1178		rt = net->ipv6.ip6_null_entry;
1179		dst_hold(&rt->dst);
1180	} else {
1181		rt = NULL;
1182	}
1183	*prt = rt;
1184	return false;
1185}
1186
1187/* called with rcu_lock held */
1188static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1189{
1190	struct net_device *dev = res->nh->fib_nh_dev;
1191	struct fib6_info *f6i = res->f6i;
1192	unsigned short flags;
1193	struct rt6_info *nrt;
1194
1195	if (!fib6_info_hold_safe(f6i))
1196		goto fallback;
1197
1198	flags = fib6_info_dst_flags(f6i);
1199	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1200	if (!nrt) {
1201		fib6_info_release(f6i);
1202		goto fallback;
1203	}
1204
1205	ip6_rt_copy_init(nrt, res);
1206	return nrt;
1207
1208fallback:
1209	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1210	dst_hold(&nrt->dst);
1211	return nrt;
1212}
1213
1214INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1215					     struct fib6_table *table,
1216					     struct flowi6 *fl6,
1217					     const struct sk_buff *skb,
1218					     int flags)
1219{
1220	struct fib6_result res = {};
1221	struct fib6_node *fn;
1222	struct rt6_info *rt;
1223
1224	rcu_read_lock();
1225	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226restart:
1227	res.f6i = rcu_dereference(fn->leaf);
1228	if (!res.f6i)
1229		res.f6i = net->ipv6.fib6_null_entry;
1230	else
1231		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1232				 flags);
1233
1234	if (res.f6i == net->ipv6.fib6_null_entry) {
1235		fn = fib6_backtrack(fn, &fl6->saddr);
1236		if (fn)
1237			goto restart;
1238
1239		rt = net->ipv6.ip6_null_entry;
1240		dst_hold(&rt->dst);
1241		goto out;
1242	} else if (res.fib6_flags & RTF_REJECT) {
1243		goto do_create;
1244	}
1245
1246	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1247			 fl6->flowi6_oif != 0, skb, flags);
1248
1249	/* Search through exception table */
1250	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1251	if (rt) {
1252		if (ip6_hold_safe(net, &rt))
1253			dst_use_noref(&rt->dst, jiffies);
1254	} else {
1255do_create:
1256		rt = ip6_create_rt_rcu(&res);
1257	}
1258
1259out:
1260	trace_fib6_table_lookup(net, &res, table, fl6);
1261
1262	rcu_read_unlock();
1263
1264	return rt;
1265}
1266
1267struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1268				   const struct sk_buff *skb, int flags)
1269{
1270	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1271}
1272EXPORT_SYMBOL_GPL(ip6_route_lookup);
1273
1274struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1275			    const struct in6_addr *saddr, int oif,
1276			    const struct sk_buff *skb, int strict)
1277{
1278	struct flowi6 fl6 = {
1279		.flowi6_oif = oif,
1280		.daddr = *daddr,
1281	};
1282	struct dst_entry *dst;
1283	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1284
1285	if (saddr) {
1286		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1287		flags |= RT6_LOOKUP_F_HAS_SADDR;
1288	}
1289
1290	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1291	if (dst->error == 0)
1292		return dst_rt6_info(dst);
1293
1294	dst_release(dst);
1295
1296	return NULL;
1297}
 
1298EXPORT_SYMBOL(rt6_lookup);
1299
1300/* ip6_ins_rt is called with FREE table->tb6_lock.
1301 * It takes new route entry, the addition fails by any reason the
1302 * route is released.
1303 * Caller must hold dst before calling it.
1304 */
1305
1306static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1307			struct netlink_ext_ack *extack)
1308{
1309	int err;
1310	struct fib6_table *table;
1311
1312	table = rt->fib6_table;
1313	spin_lock_bh(&table->tb6_lock);
1314	err = fib6_add(&table->tb6_root, rt, info, extack);
1315	spin_unlock_bh(&table->tb6_lock);
1316
1317	return err;
1318}
1319
1320int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1321{
1322	struct nl_info info = {	.nl_net = net, };
1323
1324	return __ip6_ins_rt(rt, &info, NULL);
 
1325}
1326
1327static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1328					   const struct in6_addr *daddr,
1329					   const struct in6_addr *saddr)
1330{
1331	struct fib6_info *f6i = res->f6i;
1332	struct net_device *dev;
1333	struct rt6_info *rt;
1334
1335	/*
1336	 *	Clone the route.
1337	 */
1338
1339	if (!fib6_info_hold_safe(f6i))
1340		return NULL;
 
 
 
1341
1342	dev = ip6_rt_get_dev_rcu(res);
1343	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1344	if (!rt) {
1345		fib6_info_release(f6i);
1346		return NULL;
1347	}
1348
1349	ip6_rt_copy_init(rt, res);
1350	rt->rt6i_flags |= RTF_CACHE;
1351	rt->rt6i_dst.addr = *daddr;
1352	rt->rt6i_dst.plen = 128;
1353
1354	if (!rt6_is_gw_or_nonexthop(res)) {
1355		if (f6i->fib6_dst.plen != 128 &&
1356		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1357			rt->rt6i_flags |= RTF_ANYCAST;
1358#ifdef CONFIG_IPV6_SUBTREES
1359		if (rt->rt6i_src.plen && saddr) {
1360			rt->rt6i_src.addr = *saddr;
1361			rt->rt6i_src.plen = 128;
1362		}
1363#endif
1364	}
1365
1366	return rt;
1367}
1368
1369static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1370{
1371	struct fib6_info *f6i = res->f6i;
1372	unsigned short flags = fib6_info_dst_flags(f6i);
1373	struct net_device *dev;
1374	struct rt6_info *pcpu_rt;
1375
1376	if (!fib6_info_hold_safe(f6i))
1377		return NULL;
1378
1379	rcu_read_lock();
1380	dev = ip6_rt_get_dev_rcu(res);
1381	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1382	rcu_read_unlock();
1383	if (!pcpu_rt) {
1384		fib6_info_release(f6i);
1385		return NULL;
1386	}
1387	ip6_rt_copy_init(pcpu_rt, res);
1388	pcpu_rt->rt6i_flags |= RTF_PCPU;
1389
1390	if (f6i->nh)
1391		pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1392
1393	return pcpu_rt;
1394}
1395
1396static bool rt6_is_valid(const struct rt6_info *rt6)
1397{
1398	return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1399}
1400
1401/* It should be called with rcu_read_lock() acquired */
1402static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1403{
1404	struct rt6_info *pcpu_rt;
1405
1406	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1407
1408	if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1409		struct rt6_info *prev, **p;
1410
1411		p = this_cpu_ptr(res->nh->rt6i_pcpu);
1412		prev = xchg(p, NULL);
1413		if (prev) {
1414			dst_dev_put(&prev->dst);
1415			dst_release(&prev->dst);
1416		}
 
1417
1418		pcpu_rt = NULL;
1419	}
1420
1421	return pcpu_rt;
1422}
1423
1424static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1425					    const struct fib6_result *res)
1426{
1427	struct rt6_info *pcpu_rt, *prev, **p;
1428
1429	pcpu_rt = ip6_rt_pcpu_alloc(res);
1430	if (!pcpu_rt)
1431		return NULL;
1432
1433	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1434	prev = cmpxchg(p, NULL, pcpu_rt);
1435	BUG_ON(prev);
1436
1437	if (res->f6i->fib6_destroying) {
1438		struct fib6_info *from;
1439
1440		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1441		fib6_info_release(from);
1442	}
1443
1444	return pcpu_rt;
1445}
1446
1447/* exception hash table implementation
1448 */
1449static DEFINE_SPINLOCK(rt6_exception_lock);
1450
1451/* Remove rt6_ex from hash table and free the memory
1452 * Caller must hold rt6_exception_lock
1453 */
1454static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1455				 struct rt6_exception *rt6_ex)
1456{
1457	struct fib6_info *from;
1458	struct net *net;
 
 
 
 
1459
1460	if (!bucket || !rt6_ex)
1461		return;
1462
1463	net = dev_net(rt6_ex->rt6i->dst.dev);
1464	net->ipv6.rt6_stats->fib_rt_cache--;
1465
1466	/* purge completely the exception to allow releasing the held resources:
1467	 * some [sk] cache may keep the dst around for unlimited time
1468	 */
1469	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1470	fib6_info_release(from);
1471	dst_dev_put(&rt6_ex->rt6i->dst);
1472
1473	hlist_del_rcu(&rt6_ex->hlist);
1474	dst_release(&rt6_ex->rt6i->dst);
1475	kfree_rcu(rt6_ex, rcu);
1476	WARN_ON_ONCE(!bucket->depth);
1477	bucket->depth--;
1478}
1479
1480/* Remove oldest rt6_ex in bucket and free the memory
1481 * Caller must hold rt6_exception_lock
1482 */
1483static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1484{
1485	struct rt6_exception *rt6_ex, *oldest = NULL;
1486
1487	if (!bucket)
1488		return;
1489
1490	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1491		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1492			oldest = rt6_ex;
1493	}
1494	rt6_remove_exception(bucket, oldest);
1495}
1496
1497static u32 rt6_exception_hash(const struct in6_addr *dst,
1498			      const struct in6_addr *src)
1499{
1500	static siphash_aligned_key_t rt6_exception_key;
1501	struct {
1502		struct in6_addr dst;
1503		struct in6_addr src;
1504	} __aligned(SIPHASH_ALIGNMENT) combined = {
1505		.dst = *dst,
1506	};
1507	u64 val;
1508
1509	net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
1510
1511#ifdef CONFIG_IPV6_SUBTREES
1512	if (src)
1513		combined.src = *src;
1514#endif
1515	val = siphash(&combined, sizeof(combined), &rt6_exception_key);
1516
1517	return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1518}
1519
1520/* Helper function to find the cached rt in the hash table
1521 * and update bucket pointer to point to the bucket for this
1522 * (daddr, saddr) pair
1523 * Caller must hold rt6_exception_lock
1524 */
1525static struct rt6_exception *
1526__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1527			      const struct in6_addr *daddr,
1528			      const struct in6_addr *saddr)
1529{
1530	struct rt6_exception *rt6_ex;
1531	u32 hval;
1532
1533	if (!(*bucket) || !daddr)
1534		return NULL;
1535
1536	hval = rt6_exception_hash(daddr, saddr);
1537	*bucket += hval;
1538
1539	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1540		struct rt6_info *rt6 = rt6_ex->rt6i;
1541		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1542
1543#ifdef CONFIG_IPV6_SUBTREES
1544		if (matched && saddr)
1545			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1546#endif
1547		if (matched)
1548			return rt6_ex;
1549	}
1550	return NULL;
1551}
1552
1553/* Helper function to find the cached rt in the hash table
1554 * and update bucket pointer to point to the bucket for this
1555 * (daddr, saddr) pair
1556 * Caller must hold rcu_read_lock()
1557 */
1558static struct rt6_exception *
1559__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1560			 const struct in6_addr *daddr,
1561			 const struct in6_addr *saddr)
1562{
1563	struct rt6_exception *rt6_ex;
1564	u32 hval;
1565
1566	WARN_ON_ONCE(!rcu_read_lock_held());
1567
1568	if (!(*bucket) || !daddr)
1569		return NULL;
1570
1571	hval = rt6_exception_hash(daddr, saddr);
1572	*bucket += hval;
1573
1574	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1575		struct rt6_info *rt6 = rt6_ex->rt6i;
1576		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1577
1578#ifdef CONFIG_IPV6_SUBTREES
1579		if (matched && saddr)
1580			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1581#endif
1582		if (matched)
1583			return rt6_ex;
1584	}
1585	return NULL;
1586}
1587
1588static unsigned int fib6_mtu(const struct fib6_result *res)
1589{
1590	const struct fib6_nh *nh = res->nh;
1591	unsigned int mtu;
1592
1593	if (res->f6i->fib6_pmtu) {
1594		mtu = res->f6i->fib6_pmtu;
1595	} else {
1596		struct net_device *dev = nh->fib_nh_dev;
1597		struct inet6_dev *idev;
1598
1599		rcu_read_lock();
1600		idev = __in6_dev_get(dev);
1601		mtu = READ_ONCE(idev->cnf.mtu6);
1602		rcu_read_unlock();
1603	}
1604
1605	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1606
1607	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1608}
1609
1610#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1611
1612/* used when the flushed bit is not relevant, only access to the bucket
1613 * (ie., all bucket users except rt6_insert_exception);
1614 *
1615 * called under rcu lock; sometimes called with rt6_exception_lock held
1616 */
1617static
1618struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1619						       spinlock_t *lock)
1620{
1621	struct rt6_exception_bucket *bucket;
1622
1623	if (lock)
1624		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1625						   lockdep_is_held(lock));
1626	else
1627		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1628
1629	/* remove bucket flushed bit if set */
1630	if (bucket) {
1631		unsigned long p = (unsigned long)bucket;
1632
1633		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1634		bucket = (struct rt6_exception_bucket *)p;
 
 
 
1635	}
1636
1637	return bucket;
1638}
1639
1640static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1641{
1642	unsigned long p = (unsigned long)bucket;
1643
1644	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1645}
1646
1647/* called with rt6_exception_lock held */
1648static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1649					      spinlock_t *lock)
1650{
1651	struct rt6_exception_bucket *bucket;
1652	unsigned long p;
1653
1654	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1655					   lockdep_is_held(lock));
1656
1657	p = (unsigned long)bucket;
1658	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1659	bucket = (struct rt6_exception_bucket *)p;
1660	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1661}
1662
1663static int rt6_insert_exception(struct rt6_info *nrt,
1664				const struct fib6_result *res)
1665{
1666	struct net *net = dev_net(nrt->dst.dev);
1667	struct rt6_exception_bucket *bucket;
1668	struct fib6_info *f6i = res->f6i;
1669	struct in6_addr *src_key = NULL;
1670	struct rt6_exception *rt6_ex;
1671	struct fib6_nh *nh = res->nh;
1672	int max_depth;
1673	int err = 0;
1674
1675	spin_lock_bh(&rt6_exception_lock);
1676
1677	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1678					  lockdep_is_held(&rt6_exception_lock));
1679	if (!bucket) {
1680		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1681				 GFP_ATOMIC);
1682		if (!bucket) {
1683			err = -ENOMEM;
1684			goto out;
1685		}
1686		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1687	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1688		err = -EINVAL;
1689		goto out;
1690	}
1691
1692#ifdef CONFIG_IPV6_SUBTREES
1693	/* fib6_src.plen != 0 indicates f6i is in subtree
1694	 * and exception table is indexed by a hash of
1695	 * both fib6_dst and fib6_src.
1696	 * Otherwise, the exception table is indexed by
1697	 * a hash of only fib6_dst.
1698	 */
1699	if (f6i->fib6_src.plen)
1700		src_key = &nrt->rt6i_src.addr;
1701#endif
1702	/* rt6_mtu_change() might lower mtu on f6i.
1703	 * Only insert this exception route if its mtu
1704	 * is less than f6i's mtu value.
1705	 */
1706	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1707		err = -EINVAL;
1708		goto out;
1709	}
1710
1711	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1712					       src_key);
1713	if (rt6_ex)
1714		rt6_remove_exception(bucket, rt6_ex);
1715
1716	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1717	if (!rt6_ex) {
1718		err = -ENOMEM;
1719		goto out;
1720	}
1721	rt6_ex->rt6i = nrt;
1722	rt6_ex->stamp = jiffies;
1723	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1724	bucket->depth++;
1725	net->ipv6.rt6_stats->fib_rt_cache++;
1726
1727	/* Randomize max depth to avoid some side channels attacks. */
1728	max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
1729	while (bucket->depth > max_depth)
1730		rt6_exception_remove_oldest(bucket);
1731
1732out:
1733	spin_unlock_bh(&rt6_exception_lock);
1734
1735	/* Update fn->fn_sernum to invalidate all cached dst */
1736	if (!err) {
1737		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1738		fib6_update_sernum(net, f6i);
1739		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1740		fib6_force_start_gc(net);
1741	}
1742
1743	return err;
1744}
1745
1746static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1747{
1748	struct rt6_exception_bucket *bucket;
1749	struct rt6_exception *rt6_ex;
1750	struct hlist_node *tmp;
1751	int i;
1752
1753	spin_lock_bh(&rt6_exception_lock);
1754
1755	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1756	if (!bucket)
1757		goto out;
1758
1759	/* Prevent rt6_insert_exception() to recreate the bucket list */
1760	if (!from)
1761		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1762
1763	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1764		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1765			if (!from ||
1766			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1767				rt6_remove_exception(bucket, rt6_ex);
1768		}
1769		WARN_ON_ONCE(!from && bucket->depth);
1770		bucket++;
1771	}
1772out:
1773	spin_unlock_bh(&rt6_exception_lock);
1774}
1775
1776static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1777{
1778	struct fib6_info *f6i = arg;
1779
1780	fib6_nh_flush_exceptions(nh, f6i);
1781
1782	return 0;
1783}
1784
1785void rt6_flush_exceptions(struct fib6_info *f6i)
1786{
1787	if (f6i->nh)
1788		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1789					 f6i);
1790	else
1791		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1792}
1793
1794/* Find cached rt in the hash table inside passed in rt
1795 * Caller has to hold rcu_read_lock()
1796 */
1797static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1798					   const struct in6_addr *daddr,
1799					   const struct in6_addr *saddr)
1800{
1801	const struct in6_addr *src_key = NULL;
1802	struct rt6_exception_bucket *bucket;
1803	struct rt6_exception *rt6_ex;
1804	struct rt6_info *ret = NULL;
1805
1806#ifdef CONFIG_IPV6_SUBTREES
1807	/* fib6i_src.plen != 0 indicates f6i is in subtree
1808	 * and exception table is indexed by a hash of
1809	 * both fib6_dst and fib6_src.
1810	 * However, the src addr used to create the hash
1811	 * might not be exactly the passed in saddr which
1812	 * is a /128 addr from the flow.
1813	 * So we need to use f6i->fib6_src to redo lookup
1814	 * if the passed in saddr does not find anything.
1815	 * (See the logic in ip6_rt_cache_alloc() on how
1816	 * rt->rt6i_src is updated.)
1817	 */
1818	if (res->f6i->fib6_src.plen)
1819		src_key = saddr;
1820find_ex:
1821#endif
1822	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1823	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1824
1825	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1826		ret = rt6_ex->rt6i;
1827
1828#ifdef CONFIG_IPV6_SUBTREES
1829	/* Use fib6_src as src_key and redo lookup */
1830	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1831		src_key = &res->f6i->fib6_src.addr;
1832		goto find_ex;
1833	}
1834#endif
1835
1836	return ret;
1837}
1838
1839/* Remove the passed in cached rt from the hash table that contains it */
1840static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1841				    const struct rt6_info *rt)
1842{
1843	const struct in6_addr *src_key = NULL;
1844	struct rt6_exception_bucket *bucket;
1845	struct rt6_exception *rt6_ex;
1846	int err;
1847
1848	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1849		return -ENOENT;
1850
1851	spin_lock_bh(&rt6_exception_lock);
1852	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1853
1854#ifdef CONFIG_IPV6_SUBTREES
1855	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1856	 * and exception table is indexed by a hash of
1857	 * both rt6i_dst and rt6i_src.
1858	 * Otherwise, the exception table is indexed by
1859	 * a hash of only rt6i_dst.
1860	 */
1861	if (plen)
1862		src_key = &rt->rt6i_src.addr;
1863#endif
1864	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1865					       &rt->rt6i_dst.addr,
1866					       src_key);
1867	if (rt6_ex) {
1868		rt6_remove_exception(bucket, rt6_ex);
1869		err = 0;
1870	} else {
1871		err = -ENOENT;
1872	}
1873
1874	spin_unlock_bh(&rt6_exception_lock);
1875	return err;
1876}
1877
1878struct fib6_nh_excptn_arg {
1879	struct rt6_info	*rt;
1880	int		plen;
1881};
1882
1883static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1884{
1885	struct fib6_nh_excptn_arg *arg = _arg;
1886	int err;
1887
1888	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1889	if (err == 0)
1890		return 1;
1891
1892	return 0;
1893}
1894
1895static int rt6_remove_exception_rt(struct rt6_info *rt)
1896{
1897	struct fib6_info *from;
1898
1899	from = rcu_dereference(rt->from);
1900	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1901		return -EINVAL;
1902
1903	if (from->nh) {
1904		struct fib6_nh_excptn_arg arg = {
1905			.rt = rt,
1906			.plen = from->fib6_src.plen
1907		};
1908		int rc;
1909
1910		/* rc = 1 means an entry was found */
1911		rc = nexthop_for_each_fib6_nh(from->nh,
1912					      rt6_nh_remove_exception_rt,
1913					      &arg);
1914		return rc ? 0 : -ENOENT;
1915	}
1916
1917	return fib6_nh_remove_exception(from->fib6_nh,
1918					from->fib6_src.plen, rt);
1919}
1920
1921/* Find rt6_ex which contains the passed in rt cache and
1922 * refresh its stamp
1923 */
1924static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1925				     const struct rt6_info *rt)
1926{
1927	const struct in6_addr *src_key = NULL;
1928	struct rt6_exception_bucket *bucket;
1929	struct rt6_exception *rt6_ex;
1930
1931	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1932#ifdef CONFIG_IPV6_SUBTREES
1933	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1934	 * and exception table is indexed by a hash of
1935	 * both rt6i_dst and rt6i_src.
1936	 * Otherwise, the exception table is indexed by
1937	 * a hash of only rt6i_dst.
1938	 */
1939	if (plen)
1940		src_key = &rt->rt6i_src.addr;
1941#endif
1942	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1943	if (rt6_ex)
1944		rt6_ex->stamp = jiffies;
1945}
1946
1947struct fib6_nh_match_arg {
1948	const struct net_device *dev;
1949	const struct in6_addr	*gw;
1950	struct fib6_nh		*match;
1951};
1952
1953/* determine if fib6_nh has given device and gateway */
1954static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1955{
1956	struct fib6_nh_match_arg *arg = _arg;
1957
1958	if (arg->dev != nh->fib_nh_dev ||
1959	    (arg->gw && !nh->fib_nh_gw_family) ||
1960	    (!arg->gw && nh->fib_nh_gw_family) ||
1961	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1962		return 0;
1963
1964	arg->match = nh;
1965
1966	/* found a match, break the loop */
1967	return 1;
1968}
1969
1970static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1971{
1972	struct fib6_info *from;
1973	struct fib6_nh *fib6_nh;
1974
1975	rcu_read_lock();
1976
1977	from = rcu_dereference(rt->from);
1978	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1979		goto unlock;
1980
1981	if (from->nh) {
1982		struct fib6_nh_match_arg arg = {
1983			.dev = rt->dst.dev,
1984			.gw = &rt->rt6i_gateway,
1985		};
1986
1987		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1988
1989		if (!arg.match)
1990			goto unlock;
1991		fib6_nh = arg.match;
1992	} else {
1993		fib6_nh = from->fib6_nh;
1994	}
1995	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1996unlock:
1997	rcu_read_unlock();
1998}
1999
2000static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
2001					 struct rt6_info *rt, int mtu)
2002{
2003	/* If the new MTU is lower than the route PMTU, this new MTU will be the
2004	 * lowest MTU in the path: always allow updating the route PMTU to
2005	 * reflect PMTU decreases.
2006	 *
2007	 * If the new MTU is higher, and the route PMTU is equal to the local
2008	 * MTU, this means the old MTU is the lowest in the path, so allow
2009	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
2010	 * handle this.
2011	 */
2012
2013	if (dst_mtu(&rt->dst) >= mtu)
2014		return true;
2015
2016	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2017		return true;
2018
2019	return false;
2020}
2021
2022static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2023				       const struct fib6_nh *nh, int mtu)
2024{
2025	struct rt6_exception_bucket *bucket;
2026	struct rt6_exception *rt6_ex;
2027	int i;
2028
2029	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2030	if (!bucket)
2031		return;
2032
2033	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2034		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2035			struct rt6_info *entry = rt6_ex->rt6i;
2036
2037			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2038			 * route), the metrics of its rt->from have already
2039			 * been updated.
2040			 */
2041			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2042			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2043				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2044		}
2045		bucket++;
2046	}
2047}
2048
2049#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2050
2051static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2052					    const struct in6_addr *gateway)
2053{
2054	struct rt6_exception_bucket *bucket;
2055	struct rt6_exception *rt6_ex;
2056	struct hlist_node *tmp;
2057	int i;
2058
2059	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2060		return;
2061
2062	spin_lock_bh(&rt6_exception_lock);
2063	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2064	if (bucket) {
2065		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2066			hlist_for_each_entry_safe(rt6_ex, tmp,
2067						  &bucket->chain, hlist) {
2068				struct rt6_info *entry = rt6_ex->rt6i;
2069
2070				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2071				    RTF_CACHE_GATEWAY &&
2072				    ipv6_addr_equal(gateway,
2073						    &entry->rt6i_gateway)) {
2074					rt6_remove_exception(bucket, rt6_ex);
2075				}
2076			}
2077			bucket++;
2078		}
2079	}
2080
2081	spin_unlock_bh(&rt6_exception_lock);
2082}
2083
2084static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2085				      struct rt6_exception *rt6_ex,
2086				      struct fib6_gc_args *gc_args,
2087				      unsigned long now)
2088{
2089	struct rt6_info *rt = rt6_ex->rt6i;
2090
2091	/* we are pruning and obsoleting aged-out and non gateway exceptions
2092	 * even if others have still references to them, so that on next
2093	 * dst_check() such references can be dropped.
2094	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2095	 * expired, independently from their aging, as per RFC 8201 section 4
2096	 */
2097	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2098		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2099			pr_debug("aging clone %p\n", rt);
2100			rt6_remove_exception(bucket, rt6_ex);
2101			return;
2102		}
2103	} else if (time_after(jiffies, rt->dst.expires)) {
2104		pr_debug("purging expired route %p\n", rt);
2105		rt6_remove_exception(bucket, rt6_ex);
2106		return;
2107	}
2108
2109	if (rt->rt6i_flags & RTF_GATEWAY) {
2110		struct neighbour *neigh;
2111
2112		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2113
2114		if (!(neigh && (neigh->flags & NTF_ROUTER))) {
2115			pr_debug("purging route %p via non-router but gateway\n",
2116				 rt);
2117			rt6_remove_exception(bucket, rt6_ex);
2118			return;
2119		}
2120	}
2121
2122	gc_args->more++;
2123}
2124
2125static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2126				   struct fib6_gc_args *gc_args,
2127				   unsigned long now)
2128{
2129	struct rt6_exception_bucket *bucket;
2130	struct rt6_exception *rt6_ex;
2131	struct hlist_node *tmp;
2132	int i;
2133
2134	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2135		return;
2136
2137	rcu_read_lock_bh();
2138	spin_lock(&rt6_exception_lock);
2139	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2140	if (bucket) {
2141		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2142			hlist_for_each_entry_safe(rt6_ex, tmp,
2143						  &bucket->chain, hlist) {
2144				rt6_age_examine_exception(bucket, rt6_ex,
2145							  gc_args, now);
2146			}
2147			bucket++;
2148		}
2149	}
2150	spin_unlock(&rt6_exception_lock);
2151	rcu_read_unlock_bh();
2152}
2153
2154struct fib6_nh_age_excptn_arg {
2155	struct fib6_gc_args	*gc_args;
2156	unsigned long		now;
2157};
2158
2159static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2160{
2161	struct fib6_nh_age_excptn_arg *arg = _arg;
2162
2163	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2164	return 0;
2165}
2166
2167void rt6_age_exceptions(struct fib6_info *f6i,
2168			struct fib6_gc_args *gc_args,
2169			unsigned long now)
2170{
2171	if (f6i->nh) {
2172		struct fib6_nh_age_excptn_arg arg = {
2173			.gc_args = gc_args,
2174			.now = now
2175		};
2176
2177		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2178					 &arg);
2179	} else {
2180		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2181	}
2182}
2183
2184/* must be called with rcu lock held */
2185int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2186		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2187{
2188	struct fib6_node *fn, *saved_fn;
2189
2190	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2191	saved_fn = fn;
2192
2193redo_rt6_select:
2194	rt6_select(net, fn, oif, res, strict);
2195	if (res->f6i == net->ipv6.fib6_null_entry) {
2196		fn = fib6_backtrack(fn, &fl6->saddr);
2197		if (fn)
2198			goto redo_rt6_select;
2199		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2200			/* also consider unreachable route */
2201			strict &= ~RT6_LOOKUP_F_REACHABLE;
2202			fn = saved_fn;
2203			goto redo_rt6_select;
2204		}
2205	}
2206
2207	trace_fib6_table_lookup(net, res, table, fl6);
2208
2209	return 0;
2210}
2211
2212struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2213			       int oif, struct flowi6 *fl6,
2214			       const struct sk_buff *skb, int flags)
2215{
2216	struct fib6_result res = {};
2217	struct rt6_info *rt = NULL;
2218	int strict = 0;
2219
2220	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2221		     !rcu_read_lock_held());
2222
2223	strict |= flags & RT6_LOOKUP_F_IFACE;
2224	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2225	if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
2226		strict |= RT6_LOOKUP_F_REACHABLE;
2227
2228	rcu_read_lock();
2229
2230	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2231	if (res.f6i == net->ipv6.fib6_null_entry)
2232		goto out;
2233
2234	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2235
2236	/*Search through exception table */
2237	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2238	if (rt) {
2239		goto out;
2240	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2241			    !res.nh->fib_nh_gw_family)) {
2242		/* Create a RTF_CACHE clone which will not be
2243		 * owned by the fib6 tree.  It is for the special case where
2244		 * the daddr in the skb during the neighbor look-up is different
2245		 * from the fl6->daddr used to look-up route here.
2246		 */
2247		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2248
2249		if (rt) {
2250			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2251			 * As rt6_uncached_list_add() does not consume refcnt,
2252			 * this refcnt is always returned to the caller even
2253			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2254			 */
2255			rt6_uncached_list_add(rt);
2256			rcu_read_unlock();
2257
2258			return rt;
2259		}
2260	} else {
2261		/* Get a percpu copy */
2262		local_bh_disable();
2263		rt = rt6_get_pcpu_route(&res);
2264
2265		if (!rt)
2266			rt = rt6_make_pcpu_route(net, &res);
2267
2268		local_bh_enable();
2269	}
2270out:
2271	if (!rt)
2272		rt = net->ipv6.ip6_null_entry;
2273	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2274		ip6_hold_safe(net, &rt);
2275	rcu_read_unlock();
2276
2277	return rt;
2278}
2279EXPORT_SYMBOL_GPL(ip6_pol_route);
2280
2281INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2282					    struct fib6_table *table,
2283					    struct flowi6 *fl6,
2284					    const struct sk_buff *skb,
2285					    int flags)
2286{
2287	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2288}
2289
2290struct dst_entry *ip6_route_input_lookup(struct net *net,
2291					 struct net_device *dev,
2292					 struct flowi6 *fl6,
2293					 const struct sk_buff *skb,
2294					 int flags)
2295{
2296	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2297		flags |= RT6_LOOKUP_F_IFACE;
2298
2299	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2300}
2301EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2302
2303static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2304				  struct flow_keys *keys,
2305				  struct flow_keys *flkeys)
2306{
2307	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2308	const struct ipv6hdr *key_iph = outer_iph;
2309	struct flow_keys *_flkeys = flkeys;
2310	const struct ipv6hdr *inner_iph;
2311	const struct icmp6hdr *icmph;
2312	struct ipv6hdr _inner_iph;
2313	struct icmp6hdr _icmph;
2314
2315	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2316		goto out;
2317
2318	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2319				   sizeof(_icmph), &_icmph);
2320	if (!icmph)
2321		goto out;
2322
2323	if (!icmpv6_is_err(icmph->icmp6_type))
2324		goto out;
2325
2326	inner_iph = skb_header_pointer(skb,
2327				       skb_transport_offset(skb) + sizeof(*icmph),
2328				       sizeof(_inner_iph), &_inner_iph);
2329	if (!inner_iph)
2330		goto out;
2331
2332	key_iph = inner_iph;
2333	_flkeys = NULL;
2334out:
2335	if (_flkeys) {
2336		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2337		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2338		keys->tags.flow_label = _flkeys->tags.flow_label;
2339		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2340	} else {
2341		keys->addrs.v6addrs.src = key_iph->saddr;
2342		keys->addrs.v6addrs.dst = key_iph->daddr;
2343		keys->tags.flow_label = ip6_flowlabel(key_iph);
2344		keys->basic.ip_proto = key_iph->nexthdr;
2345	}
2346}
2347
2348static u32 rt6_multipath_custom_hash_outer(const struct net *net,
2349					   const struct sk_buff *skb,
2350					   bool *p_has_inner)
2351{
2352	u32 hash_fields = ip6_multipath_hash_fields(net);
2353	struct flow_keys keys, hash_keys;
2354
2355	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2356		return 0;
2357
2358	memset(&hash_keys, 0, sizeof(hash_keys));
2359	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
2360
2361	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2362	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2363		hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2364	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2365		hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2366	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2367		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2368	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2369		hash_keys.tags.flow_label = keys.tags.flow_label;
2370	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2371		hash_keys.ports.src = keys.ports.src;
2372	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2373		hash_keys.ports.dst = keys.ports.dst;
2374
2375	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
2376	return flow_hash_from_keys(&hash_keys);
2377}
2378
2379static u32 rt6_multipath_custom_hash_inner(const struct net *net,
2380					   const struct sk_buff *skb,
2381					   bool has_inner)
2382{
2383	u32 hash_fields = ip6_multipath_hash_fields(net);
2384	struct flow_keys keys, hash_keys;
2385
2386	/* We assume the packet carries an encapsulation, but if none was
2387	 * encountered during dissection of the outer flow, then there is no
2388	 * point in calling the flow dissector again.
2389	 */
2390	if (!has_inner)
2391		return 0;
2392
2393	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
2394		return 0;
2395
2396	memset(&hash_keys, 0, sizeof(hash_keys));
2397	skb_flow_dissect_flow_keys(skb, &keys, 0);
2398
2399	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
2400		return 0;
2401
2402	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2403		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2404		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2405			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2406		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2407			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2408	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2409		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2410		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2411			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2412		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2413			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2414		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
2415			hash_keys.tags.flow_label = keys.tags.flow_label;
2416	}
2417
2418	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
2419		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2420	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
2421		hash_keys.ports.src = keys.ports.src;
2422	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
2423		hash_keys.ports.dst = keys.ports.dst;
2424
2425	return flow_hash_from_keys(&hash_keys);
2426}
2427
2428static u32 rt6_multipath_custom_hash_skb(const struct net *net,
2429					 const struct sk_buff *skb)
2430{
2431	u32 mhash, mhash_inner;
2432	bool has_inner = true;
2433
2434	mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
2435	mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
2436
2437	return jhash_2words(mhash, mhash_inner, 0);
2438}
2439
2440static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
2441					 const struct flowi6 *fl6)
2442{
2443	u32 hash_fields = ip6_multipath_hash_fields(net);
2444	struct flow_keys hash_keys;
2445
2446	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2447		return 0;
2448
2449	memset(&hash_keys, 0, sizeof(hash_keys));
2450	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2451	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2452		hash_keys.addrs.v6addrs.src = fl6->saddr;
2453	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2454		hash_keys.addrs.v6addrs.dst = fl6->daddr;
2455	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2456		hash_keys.basic.ip_proto = fl6->flowi6_proto;
2457	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2458		hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2459	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2460		hash_keys.ports.src = fl6->fl6_sport;
2461	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2462		hash_keys.ports.dst = fl6->fl6_dport;
2463
2464	return flow_hash_from_keys(&hash_keys);
2465}
2466
2467/* if skb is set it will be used and fl6 can be NULL */
2468u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2469		       const struct sk_buff *skb, struct flow_keys *flkeys)
2470{
2471	struct flow_keys hash_keys;
2472	u32 mhash = 0;
2473
2474	switch (ip6_multipath_hash_policy(net)) {
2475	case 0:
2476		memset(&hash_keys, 0, sizeof(hash_keys));
2477		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2478		if (skb) {
2479			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2480		} else {
2481			hash_keys.addrs.v6addrs.src = fl6->saddr;
2482			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2483			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2484			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2485		}
2486		mhash = flow_hash_from_keys(&hash_keys);
2487		break;
2488	case 1:
2489		if (skb) {
2490			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2491			struct flow_keys keys;
2492
2493			/* short-circuit if we already have L4 hash present */
2494			if (skb->l4_hash)
2495				return skb_get_hash_raw(skb) >> 1;
2496
2497			memset(&hash_keys, 0, sizeof(hash_keys));
2498
2499			if (!flkeys) {
2500				skb_flow_dissect_flow_keys(skb, &keys, flag);
2501				flkeys = &keys;
2502			}
2503			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2504			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2505			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2506			hash_keys.ports.src = flkeys->ports.src;
2507			hash_keys.ports.dst = flkeys->ports.dst;
2508			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2509		} else {
2510			memset(&hash_keys, 0, sizeof(hash_keys));
2511			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2512			hash_keys.addrs.v6addrs.src = fl6->saddr;
2513			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2514			hash_keys.ports.src = fl6->fl6_sport;
2515			hash_keys.ports.dst = fl6->fl6_dport;
2516			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2517		}
2518		mhash = flow_hash_from_keys(&hash_keys);
2519		break;
2520	case 2:
2521		memset(&hash_keys, 0, sizeof(hash_keys));
2522		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2523		if (skb) {
2524			struct flow_keys keys;
2525
2526			if (!flkeys) {
2527				skb_flow_dissect_flow_keys(skb, &keys, 0);
2528				flkeys = &keys;
2529			}
2530
2531			/* Inner can be v4 or v6 */
2532			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2533				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2534				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2535				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2536			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2537				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2538				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2539				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2540				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2541				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2542			} else {
2543				/* Same as case 0 */
2544				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2545				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2546			}
2547		} else {
2548			/* Same as case 0 */
2549			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2550			hash_keys.addrs.v6addrs.src = fl6->saddr;
2551			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2552			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2553			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2554		}
2555		mhash = flow_hash_from_keys(&hash_keys);
2556		break;
2557	case 3:
2558		if (skb)
2559			mhash = rt6_multipath_custom_hash_skb(net, skb);
2560		else
2561			mhash = rt6_multipath_custom_hash_fl6(net, fl6);
2562		break;
2563	}
2564
2565	return mhash >> 1;
2566}
2567
2568/* Called with rcu held */
2569void ip6_route_input(struct sk_buff *skb)
2570{
2571	const struct ipv6hdr *iph = ipv6_hdr(skb);
2572	struct net *net = dev_net(skb->dev);
2573	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2574	struct ip_tunnel_info *tun_info;
2575	struct flowi6 fl6 = {
2576		.flowi6_iif = skb->dev->ifindex,
2577		.daddr = iph->daddr,
2578		.saddr = iph->saddr,
2579		.flowlabel = ip6_flowinfo(iph),
2580		.flowi6_mark = skb->mark,
2581		.flowi6_proto = iph->nexthdr,
2582	};
2583	struct flow_keys *flkeys = NULL, _flkeys;
2584
2585	tun_info = skb_tunnel_info(skb);
2586	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2587		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2588
2589	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2590		flkeys = &_flkeys;
2591
2592	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2593		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2594	skb_dst_drop(skb);
2595	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2596						      &fl6, skb, flags));
2597}
2598
2599INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2600					     struct fib6_table *table,
2601					     struct flowi6 *fl6,
2602					     const struct sk_buff *skb,
2603					     int flags)
2604{
2605	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2606}
2607
2608static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2609						      const struct sock *sk,
2610						      struct flowi6 *fl6,
2611						      int flags)
2612{
2613	bool any_src;
2614
2615	if (ipv6_addr_type(&fl6->daddr) &
2616	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2617		struct dst_entry *dst;
2618
2619		/* This function does not take refcnt on the dst */
2620		dst = l3mdev_link_scope_lookup(net, fl6);
2621		if (dst)
2622			return dst;
2623	}
2624
2625	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2626
2627	flags |= RT6_LOOKUP_F_DST_NOREF;
2628	any_src = ipv6_addr_any(&fl6->saddr);
2629	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2630	    (fl6->flowi6_oif && any_src))
2631		flags |= RT6_LOOKUP_F_IFACE;
2632
2633	if (!any_src)
2634		flags |= RT6_LOOKUP_F_HAS_SADDR;
2635	else if (sk)
2636		flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
2637
2638	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2639}
2640
2641struct dst_entry *ip6_route_output_flags(struct net *net,
2642					 const struct sock *sk,
2643					 struct flowi6 *fl6,
2644					 int flags)
2645{
2646	struct dst_entry *dst;
2647	struct rt6_info *rt6;
2648
2649	rcu_read_lock();
2650	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2651	rt6 = dst_rt6_info(dst);
2652	/* For dst cached in uncached_list, refcnt is already taken. */
2653	if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
2654		dst = &net->ipv6.ip6_null_entry->dst;
2655		dst_hold(dst);
2656	}
2657	rcu_read_unlock();
2658
2659	return dst;
2660}
2661EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2662
2663struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2664{
2665	struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
2666	struct net_device *loopback_dev = net->loopback_dev;
2667	struct dst_entry *new = NULL;
2668
2669	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
2670		       DST_OBSOLETE_DEAD, 0);
2671	if (rt) {
2672		rt6_info_init(rt);
2673		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2674
2675		new = &rt->dst;
 
2676		new->__use = 1;
2677		new->input = dst_discard;
2678		new->output = dst_discard_out;
2679
2680		dst_copy_metrics(new, &ort->dst);
2681
2682		rt->rt6i_idev = in6_dev_get(loopback_dev);
2683		rt->rt6i_gateway = ort->rt6i_gateway;
2684		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
 
 
 
 
 
 
2685
2686		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2687#ifdef CONFIG_IPV6_SUBTREES
2688		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2689#endif
 
 
2690	}
2691
2692	dst_release(dst_orig);
2693	return new ? new : ERR_PTR(-ENOMEM);
2694}
2695
2696/*
2697 *	Destination cache support functions
2698 */
2699
2700static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2701{
2702	u32 rt_cookie = 0;
2703
2704	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2705		return false;
2706
2707	if (fib6_check_expired(f6i))
2708		return false;
2709
2710	return true;
2711}
2712
2713static struct dst_entry *rt6_check(struct rt6_info *rt,
2714				   struct fib6_info *from,
2715				   u32 cookie)
2716{
2717	u32 rt_cookie = 0;
2718
2719	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2720	    rt_cookie != cookie)
2721		return NULL;
2722
2723	if (rt6_check_expired(rt))
2724		return NULL;
2725
2726	return &rt->dst;
2727}
2728
2729static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2730					    struct fib6_info *from,
2731					    u32 cookie)
2732{
2733	if (!__rt6_check_expired(rt) &&
2734	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2735	    fib6_check(from, cookie))
2736		return &rt->dst;
2737	else
2738		return NULL;
2739}
2740
2741INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
2742							u32 cookie)
2743{
2744	struct dst_entry *dst_ret;
2745	struct fib6_info *from;
2746	struct rt6_info *rt;
2747
2748	rt = dst_rt6_info(dst);
2749
2750	if (rt->sernum)
2751		return rt6_is_valid(rt) ? dst : NULL;
2752
2753	rcu_read_lock();
2754
2755	/* All IPV6 dsts are created with ->obsolete set to the value
2756	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2757	 * into this function always.
2758	 */
2759
2760	from = rcu_dereference(rt->from);
2761
2762	if (from && (rt->rt6i_flags & RTF_PCPU ||
2763	    unlikely(!list_empty(&rt->dst.rt_uncached))))
2764		dst_ret = rt6_dst_from_check(rt, from, cookie);
2765	else
2766		dst_ret = rt6_check(rt, from, cookie);
2767
2768	rcu_read_unlock();
2769
2770	return dst_ret;
2771}
2772EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
2773
2774static void ip6_negative_advice(struct sock *sk,
2775				struct dst_entry *dst)
2776{
2777	struct rt6_info *rt = dst_rt6_info(dst);
2778
2779	if (rt->rt6i_flags & RTF_CACHE) {
2780		rcu_read_lock();
2781		if (rt6_check_expired(rt)) {
2782			/* counteract the dst_release() in sk_dst_reset() */
2783			dst_hold(dst);
2784			sk_dst_reset(sk);
2785
2786			rt6_remove_exception_rt(rt);
2787		}
2788		rcu_read_unlock();
2789		return;
2790	}
2791	sk_dst_reset(sk);
2792}
2793
2794static void ip6_link_failure(struct sk_buff *skb)
2795{
2796	struct rt6_info *rt;
2797
2798	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2799
2800	rt = dst_rt6_info(skb_dst(skb));
2801	if (rt) {
2802		rcu_read_lock();
2803		if (rt->rt6i_flags & RTF_CACHE) {
2804			rt6_remove_exception_rt(rt);
2805		} else {
2806			struct fib6_info *from;
2807			struct fib6_node *fn;
2808
2809			from = rcu_dereference(rt->from);
2810			if (from) {
2811				fn = rcu_dereference(from->fib6_node);
2812				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2813					WRITE_ONCE(fn->fn_sernum, -1);
2814			}
2815		}
2816		rcu_read_unlock();
2817	}
2818}
2819
2820static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2821{
2822	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2823		struct fib6_info *from;
2824
2825		rcu_read_lock();
2826		from = rcu_dereference(rt0->from);
2827		if (from)
2828			rt0->dst.expires = from->expires;
2829		rcu_read_unlock();
2830	}
2831
2832	dst_set_expires(&rt0->dst, timeout);
2833	rt0->rt6i_flags |= RTF_EXPIRES;
2834}
2835
2836static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2837{
2838	struct net *net = dev_net(rt->dst.dev);
2839
2840	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2841	rt->rt6i_flags |= RTF_MODIFIED;
2842	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2843}
2844
2845static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2846{
2847	return !(rt->rt6i_flags & RTF_CACHE) &&
2848		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2849}
2850
2851static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2852				 const struct ipv6hdr *iph, u32 mtu,
2853				 bool confirm_neigh)
2854{
2855	const struct in6_addr *daddr, *saddr;
2856	struct rt6_info *rt6 = dst_rt6_info(dst);
2857
2858	/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2859	 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2860	 * [see also comment in rt6_mtu_change_route()]
2861	 */
2862
2863	if (iph) {
2864		daddr = &iph->daddr;
2865		saddr = &iph->saddr;
2866	} else if (sk) {
2867		daddr = &sk->sk_v6_daddr;
2868		saddr = &inet6_sk(sk)->saddr;
2869	} else {
2870		daddr = NULL;
2871		saddr = NULL;
2872	}
2873
2874	if (confirm_neigh)
2875		dst_confirm_neigh(dst, daddr);
2876
2877	if (mtu < IPV6_MIN_MTU)
2878		return;
2879	if (mtu >= dst_mtu(dst))
2880		return;
2881
2882	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2883		rt6_do_update_pmtu(rt6, mtu);
2884		/* update rt6_ex->stamp for cache */
2885		if (rt6->rt6i_flags & RTF_CACHE)
2886			rt6_update_exception_stamp_rt(rt6);
2887	} else if (daddr) {
2888		struct fib6_result res = {};
2889		struct rt6_info *nrt6;
2890
2891		rcu_read_lock();
2892		res.f6i = rcu_dereference(rt6->from);
2893		if (!res.f6i)
2894			goto out_unlock;
2895
2896		res.fib6_flags = res.f6i->fib6_flags;
2897		res.fib6_type = res.f6i->fib6_type;
2898
2899		if (res.f6i->nh) {
2900			struct fib6_nh_match_arg arg = {
2901				.dev = dst->dev,
2902				.gw = &rt6->rt6i_gateway,
2903			};
2904
2905			nexthop_for_each_fib6_nh(res.f6i->nh,
2906						 fib6_nh_find_match, &arg);
2907
2908			/* fib6_info uses a nexthop that does not have fib6_nh
2909			 * using the dst->dev + gw. Should be impossible.
2910			 */
2911			if (!arg.match)
2912				goto out_unlock;
2913
2914			res.nh = arg.match;
2915		} else {
2916			res.nh = res.f6i->fib6_nh;
2917		}
2918
2919		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2920		if (nrt6) {
2921			rt6_do_update_pmtu(nrt6, mtu);
2922			if (rt6_insert_exception(nrt6, &res))
2923				dst_release_immediate(&nrt6->dst);
2924		}
2925out_unlock:
2926		rcu_read_unlock();
2927	}
 
2928}
2929
2930static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2931			       struct sk_buff *skb, u32 mtu,
2932			       bool confirm_neigh)
2933{
2934	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2935			     confirm_neigh);
2936}
2937
2938void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2939		     int oif, u32 mark, kuid_t uid)
2940{
2941	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2942	struct dst_entry *dst;
2943	struct flowi6 fl6 = {
2944		.flowi6_oif = oif,
2945		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2946		.daddr = iph->daddr,
2947		.saddr = iph->saddr,
2948		.flowlabel = ip6_flowinfo(iph),
2949		.flowi6_uid = uid,
2950	};
2951
2952	dst = ip6_route_output(net, NULL, &fl6);
2953	if (!dst->error)
2954		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2955	dst_release(dst);
2956}
2957EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2958
2959void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2960{
2961	int oif = sk->sk_bound_dev_if;
2962	struct dst_entry *dst;
2963
2964	if (!oif && skb->dev)
2965		oif = l3mdev_master_ifindex(skb->dev);
2966
2967	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
2968			sk->sk_uid);
2969
2970	dst = __sk_dst_get(sk);
2971	if (!dst || !dst->obsolete ||
2972	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2973		return;
2974
2975	bh_lock_sock(sk);
2976	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2977		ip6_datagram_dst_update(sk, false);
2978	bh_unlock_sock(sk);
2979}
2980EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2981
2982void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2983			   const struct flowi6 *fl6)
2984{
2985#ifdef CONFIG_IPV6_SUBTREES
2986	struct ipv6_pinfo *np = inet6_sk(sk);
2987#endif
2988
2989	ip6_dst_store(sk, dst,
2990		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2991		      &sk->sk_v6_daddr : NULL,
2992#ifdef CONFIG_IPV6_SUBTREES
2993		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2994		      &np->saddr :
2995#endif
2996		      NULL);
2997}
2998
2999static bool ip6_redirect_nh_match(const struct fib6_result *res,
3000				  struct flowi6 *fl6,
3001				  const struct in6_addr *gw,
3002				  struct rt6_info **ret)
3003{
3004	const struct fib6_nh *nh = res->nh;
3005
3006	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
3007	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
3008		return false;
3009
3010	/* rt_cache's gateway might be different from its 'parent'
3011	 * in the case of an ip redirect.
3012	 * So we keep searching in the exception table if the gateway
3013	 * is different.
3014	 */
3015	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
3016		struct rt6_info *rt_cache;
3017
3018		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
3019		if (rt_cache &&
3020		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
3021			*ret = rt_cache;
3022			return true;
3023		}
3024		return false;
3025	}
3026	return true;
3027}
3028
3029struct fib6_nh_rd_arg {
3030	struct fib6_result	*res;
3031	struct flowi6		*fl6;
3032	const struct in6_addr	*gw;
3033	struct rt6_info		**ret;
3034};
3035
3036static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
3037{
3038	struct fib6_nh_rd_arg *arg = _arg;
3039
3040	arg->res->nh = nh;
3041	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
3042}
3043
3044/* Handle redirects */
3045struct ip6rd_flowi {
3046	struct flowi6 fl6;
3047	struct in6_addr gateway;
3048};
3049
3050INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
3051					     struct fib6_table *table,
3052					     struct flowi6 *fl6,
3053					     const struct sk_buff *skb,
3054					     int flags)
3055{
3056	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
3057	struct rt6_info *ret = NULL;
3058	struct fib6_result res = {};
3059	struct fib6_nh_rd_arg arg = {
3060		.res = &res,
3061		.fl6 = fl6,
3062		.gw  = &rdfl->gateway,
3063		.ret = &ret
3064	};
3065	struct fib6_info *rt;
3066	struct fib6_node *fn;
3067
3068	/* Get the "current" route for this destination and
3069	 * check if the redirect has come from appropriate router.
3070	 *
3071	 * RFC 4861 specifies that redirects should only be
3072	 * accepted if they come from the nexthop to the target.
3073	 * Due to the way the routes are chosen, this notion
3074	 * is a bit fuzzy and one might need to check all possible
3075	 * routes.
3076	 */
3077
3078	rcu_read_lock();
3079	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
3080restart:
3081	for_each_fib6_node_rt_rcu(fn) {
3082		res.f6i = rt;
3083		if (fib6_check_expired(rt))
3084			continue;
3085		if (rt->fib6_flags & RTF_REJECT)
3086			break;
3087		if (unlikely(rt->nh)) {
3088			if (nexthop_is_blackhole(rt->nh))
3089				continue;
3090			/* on match, res->nh is filled in and potentially ret */
3091			if (nexthop_for_each_fib6_nh(rt->nh,
3092						     fib6_nh_redirect_match,
3093						     &arg))
3094				goto out;
3095		} else {
3096			res.nh = rt->fib6_nh;
3097			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
3098						  &ret))
3099				goto out;
3100		}
 
3101	}
3102
3103	if (!rt)
3104		rt = net->ipv6.fib6_null_entry;
3105	else if (rt->fib6_flags & RTF_REJECT) {
3106		ret = net->ipv6.ip6_null_entry;
3107		goto out;
3108	}
3109
3110	if (rt == net->ipv6.fib6_null_entry) {
3111		fn = fib6_backtrack(fn, &fl6->saddr);
3112		if (fn)
3113			goto restart;
3114	}
3115
3116	res.f6i = rt;
3117	res.nh = rt->fib6_nh;
3118out:
3119	if (ret) {
3120		ip6_hold_safe(net, &ret);
3121	} else {
3122		res.fib6_flags = res.f6i->fib6_flags;
3123		res.fib6_type = res.f6i->fib6_type;
3124		ret = ip6_create_rt_rcu(&res);
3125	}
3126
3127	rcu_read_unlock();
3128
3129	trace_fib6_table_lookup(net, &res, table, fl6);
3130	return ret;
3131};
3132
3133static struct dst_entry *ip6_route_redirect(struct net *net,
3134					    const struct flowi6 *fl6,
3135					    const struct sk_buff *skb,
3136					    const struct in6_addr *gateway)
3137{
3138	int flags = RT6_LOOKUP_F_HAS_SADDR;
3139	struct ip6rd_flowi rdfl;
3140
3141	rdfl.fl6 = *fl6;
3142	rdfl.gateway = *gateway;
3143
3144	return fib6_rule_lookup(net, &rdfl.fl6, skb,
3145				flags, __ip6_route_redirect);
3146}
3147
3148void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3149		  kuid_t uid)
3150{
3151	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3152	struct dst_entry *dst;
3153	struct flowi6 fl6 = {
3154		.flowi6_iif = LOOPBACK_IFINDEX,
3155		.flowi6_oif = oif,
3156		.flowi6_mark = mark,
3157		.daddr = iph->daddr,
3158		.saddr = iph->saddr,
3159		.flowlabel = ip6_flowinfo(iph),
3160		.flowi6_uid = uid,
3161	};
3162
3163	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3164	rt6_do_redirect(dst, NULL, skb);
3165	dst_release(dst);
3166}
3167EXPORT_SYMBOL_GPL(ip6_redirect);
3168
3169void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
3170{
3171	const struct ipv6hdr *iph = ipv6_hdr(skb);
3172	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3173	struct dst_entry *dst;
3174	struct flowi6 fl6 = {
3175		.flowi6_iif = LOOPBACK_IFINDEX,
3176		.flowi6_oif = oif,
3177		.daddr = msg->dest,
3178		.saddr = iph->daddr,
3179		.flowi6_uid = sock_net_uid(net, NULL),
3180	};
3181
3182	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3183	rt6_do_redirect(dst, NULL, skb);
3184	dst_release(dst);
3185}
3186
3187void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3188{
3189	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
3190		     READ_ONCE(sk->sk_mark), sk->sk_uid);
3191}
3192EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3193
3194static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3195{
3196	struct net_device *dev = dst->dev;
3197	unsigned int mtu = dst_mtu(dst);
3198	struct net *net = dev_net(dev);
3199
3200	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3201
3202	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3203		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3204
3205	/*
3206	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3207	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3208	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3209	 * rely only on pmtu discovery"
3210	 */
3211	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3212		mtu = IPV6_MAXPLEN;
3213	return mtu;
3214}
3215
3216INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
3217{
3218	return ip6_dst_mtu_maybe_forward(dst, false);
3219}
3220EXPORT_INDIRECT_CALLABLE(ip6_mtu);
3221
3222/* MTU selection:
3223 * 1. mtu on route is locked - use it
3224 * 2. mtu from nexthop exception
3225 * 3. mtu from egress device
3226 *
3227 * based on ip6_dst_mtu_forward and exception logic of
3228 * rt6_find_cached_rt; called with rcu_read_lock
3229 */
3230u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3231		      const struct in6_addr *daddr,
3232		      const struct in6_addr *saddr)
3233{
3234	const struct fib6_nh *nh = res->nh;
3235	struct fib6_info *f6i = res->f6i;
3236	struct inet6_dev *idev;
3237	struct rt6_info *rt;
3238	u32 mtu = 0;
3239
3240	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3241		mtu = f6i->fib6_pmtu;
3242		if (mtu)
3243			goto out;
3244	}
3245
3246	rt = rt6_find_cached_rt(res, daddr, saddr);
3247	if (unlikely(rt)) {
3248		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3249	} else {
3250		struct net_device *dev = nh->fib_nh_dev;
3251
3252		mtu = IPV6_MIN_MTU;
3253		idev = __in6_dev_get(dev);
3254		if (idev)
3255			mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
3256	}
3257
3258	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3259out:
3260	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3261}
3262
3263struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3264				  struct flowi6 *fl6)
 
3265{
3266	struct dst_entry *dst;
3267	struct rt6_info *rt;
3268	struct inet6_dev *idev = in6_dev_get(dev);
3269	struct net *net = dev_net(dev);
3270
3271	if (unlikely(!idev))
3272		return ERR_PTR(-ENODEV);
3273
3274	rt = ip6_dst_alloc(net, dev, 0);
3275	if (unlikely(!rt)) {
3276		in6_dev_put(idev);
3277		dst = ERR_PTR(-ENOMEM);
3278		goto out;
3279	}
3280
3281	rt->dst.input = ip6_input;
 
 
 
 
 
 
 
 
3282	rt->dst.output  = ip6_output;
3283	rt->rt6i_gateway  = fl6->daddr;
3284	rt->rt6i_dst.addr = fl6->daddr;
 
 
 
3285	rt->rt6i_dst.plen = 128;
3286	rt->rt6i_idev     = idev;
3287	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3288
3289	/* Add this dst into uncached_list so that rt6_disable_ip() can
3290	 * do proper release of the net_device
3291	 */
3292	rt6_uncached_list_add(rt);
3293
3294	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3295
3296out:
3297	return dst;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3298}
3299
3300static void ip6_dst_gc(struct dst_ops *ops)
3301{
 
3302	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3303	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
3304	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3305	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3306	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3307	unsigned int val;
3308	int entries;
3309
3310	if (time_after(rt_last_gc + rt_min_interval, jiffies))
 
 
3311		goto out;
3312
3313	fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
 
 
3314	entries = dst_entries_get_slow(ops);
3315	if (entries < ops->gc_thresh)
3316		atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
3317out:
3318	val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
3319	atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
3320}
3321
3322static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3323			       const struct in6_addr *gw_addr, u32 tbid,
3324			       int flags, struct fib6_result *res)
3325{
3326	struct flowi6 fl6 = {
3327		.flowi6_oif = cfg->fc_ifindex,
3328		.daddr = *gw_addr,
3329		.saddr = cfg->fc_prefsrc,
3330	};
3331	struct fib6_table *table;
3332	int err;
3333
3334	table = fib6_get_table(net, tbid);
3335	if (!table)
3336		return -EINVAL;
3337
3338	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3339		flags |= RT6_LOOKUP_F_HAS_SADDR;
3340
3341	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
3342
3343	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3344	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3345		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3346				 cfg->fc_ifindex != 0, NULL, flags);
3347
3348	return err;
3349}
3350
3351static int ip6_route_check_nh_onlink(struct net *net,
3352				     struct fib6_config *cfg,
3353				     const struct net_device *dev,
3354				     struct netlink_ext_ack *extack)
3355{
3356	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3357	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3358	struct fib6_result res = {};
3359	int err;
3360
3361	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3362	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3363	    /* ignore match if it is the default route */
3364	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3365	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3366		NL_SET_ERR_MSG(extack,
3367			       "Nexthop has invalid gateway or device mismatch");
3368		err = -EINVAL;
3369	}
3370
3371	return err;
3372}
3373
3374static int ip6_route_check_nh(struct net *net,
3375			      struct fib6_config *cfg,
3376			      struct net_device **_dev,
3377			      netdevice_tracker *dev_tracker,
3378			      struct inet6_dev **idev)
3379{
3380	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3381	struct net_device *dev = _dev ? *_dev : NULL;
3382	int flags = RT6_LOOKUP_F_IFACE;
3383	struct fib6_result res = {};
3384	int err = -EHOSTUNREACH;
3385
3386	if (cfg->fc_table) {
3387		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3388					  cfg->fc_table, flags, &res);
3389		/* gw_addr can not require a gateway or resolve to a reject
3390		 * route. If a device is given, it must match the result.
3391		 */
3392		if (err || res.fib6_flags & RTF_REJECT ||
3393		    res.nh->fib_nh_gw_family ||
3394		    (dev && dev != res.nh->fib_nh_dev))
3395			err = -EHOSTUNREACH;
3396	}
3397
3398	if (err < 0) {
3399		struct flowi6 fl6 = {
3400			.flowi6_oif = cfg->fc_ifindex,
3401			.daddr = *gw_addr,
3402		};
3403
3404		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3405		if (err || res.fib6_flags & RTF_REJECT ||
3406		    res.nh->fib_nh_gw_family)
3407			err = -EHOSTUNREACH;
3408
3409		if (err)
3410			return err;
3411
3412		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3413				 cfg->fc_ifindex != 0, NULL, flags);
3414	}
3415
3416	err = 0;
3417	if (dev) {
3418		if (dev != res.nh->fib_nh_dev)
3419			err = -EHOSTUNREACH;
3420	} else {
3421		*_dev = dev = res.nh->fib_nh_dev;
3422		netdev_hold(dev, dev_tracker, GFP_ATOMIC);
3423		*idev = in6_dev_get(dev);
3424	}
3425
3426	return err;
3427}
3428
3429static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3430			   struct net_device **_dev,
3431			   netdevice_tracker *dev_tracker,
3432			   struct inet6_dev **idev,
3433			   struct netlink_ext_ack *extack)
3434{
3435	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3436	int gwa_type = ipv6_addr_type(gw_addr);
3437	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3438	const struct net_device *dev = *_dev;
3439	bool need_addr_check = !dev;
3440	int err = -EINVAL;
3441
3442	/* if gw_addr is local we will fail to detect this in case
3443	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3444	 * will return already-added prefix route via interface that
3445	 * prefix route was assigned to, which might be non-loopback.
3446	 */
3447	if (dev &&
3448	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3449		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3450		goto out;
3451	}
3452
3453	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3454		/* IPv6 strictly inhibits using not link-local
3455		 * addresses as nexthop address.
3456		 * Otherwise, router will not able to send redirects.
3457		 * It is very good, but in some (rare!) circumstances
3458		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3459		 * some exceptions. --ANK
3460		 * We allow IPv4-mapped nexthops to support RFC4798-type
3461		 * addressing
3462		 */
3463		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3464			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3465			goto out;
3466		}
3467
3468		rcu_read_lock();
3469
3470		if (cfg->fc_flags & RTNH_F_ONLINK)
3471			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3472		else
3473			err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
3474						 idev);
3475
3476		rcu_read_unlock();
3477
3478		if (err)
3479			goto out;
3480	}
3481
3482	/* reload in case device was changed */
3483	dev = *_dev;
3484
3485	err = -EINVAL;
3486	if (!dev) {
3487		NL_SET_ERR_MSG(extack, "Egress device not specified");
3488		goto out;
3489	} else if (dev->flags & IFF_LOOPBACK) {
3490		NL_SET_ERR_MSG(extack,
3491			       "Egress device can not be loopback device for this route");
3492		goto out;
3493	}
3494
3495	/* if we did not check gw_addr above, do so now that the
3496	 * egress device has been resolved.
3497	 */
3498	if (need_addr_check &&
3499	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3500		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3501		goto out;
3502	}
3503
3504	err = 0;
3505out:
3506	return err;
3507}
 
3508
3509static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3510{
3511	if ((flags & RTF_REJECT) ||
3512	    (dev && (dev->flags & IFF_LOOPBACK) &&
3513	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3514	     !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3515		return true;
3516
3517	return false;
3518}
3519
3520int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3521		 struct fib6_config *cfg, gfp_t gfp_flags,
3522		 struct netlink_ext_ack *extack)
3523{
3524	netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
 
 
3525	struct net_device *dev = NULL;
3526	struct inet6_dev *idev = NULL;
 
3527	int addr_type;
3528	int err;
3529
3530	fib6_nh->fib_nh_family = AF_INET6;
3531#ifdef CONFIG_IPV6_ROUTER_PREF
3532	fib6_nh->last_probe = jiffies;
 
 
3533#endif
3534	if (cfg->fc_is_fdb) {
3535		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3536		fib6_nh->fib_nh_gw_family = AF_INET6;
3537		return 0;
3538	}
3539
3540	err = -ENODEV;
3541	if (cfg->fc_ifindex) {
3542		dev = netdev_get_by_index(net, cfg->fc_ifindex,
3543					  dev_tracker, gfp_flags);
3544		if (!dev)
3545			goto out;
3546		idev = in6_dev_get(dev);
3547		if (!idev)
3548			goto out;
3549	}
3550
3551	if (cfg->fc_flags & RTNH_F_ONLINK) {
3552		if (!dev) {
3553			NL_SET_ERR_MSG(extack,
3554				       "Nexthop device required for onlink");
3555			goto out;
3556		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3557
3558		if (!(dev->flags & IFF_UP)) {
3559			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3560			err = -ENETDOWN;
 
 
 
 
 
 
3561			goto out;
3562		}
3563
3564		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3565	}
 
 
 
 
3566
3567	fib6_nh->fib_nh_weight = 1;
3568
3569	/* We cannot add true routes via loopback here,
3570	 * they would result in kernel looping; promote them to reject routes
3571	 */
3572	addr_type = ipv6_addr_type(&cfg->fc_dst);
3573	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
3574		/* hold loopback dev/idev if we haven't done so. */
3575		if (dev != net->loopback_dev) {
3576			if (dev) {
3577				netdev_put(dev, dev_tracker);
3578				in6_dev_put(idev);
3579			}
3580			dev = net->loopback_dev;
3581			netdev_hold(dev, dev_tracker, gfp_flags);
3582			idev = in6_dev_get(dev);
3583			if (!idev) {
3584				err = -ENODEV;
3585				goto out;
3586			}
3587		}
3588		goto pcpu_alloc;
 
 
 
 
3589	}
3590
3591	if (cfg->fc_flags & RTF_GATEWAY) {
3592		err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
3593				      &idev, extack);
3594		if (err)
3595			goto out;
3596
3597		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3598		fib6_nh->fib_nh_gw_family = AF_INET6;
3599	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3600
3601	err = -ENODEV;
3602	if (!dev)
3603		goto out;
3604
3605	if (idev->cnf.disable_ipv6) {
3606		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3607		err = -EACCES;
3608		goto out;
3609	}
 
 
 
 
 
 
 
 
 
 
 
 
3610
3611	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3612		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3613		err = -ENETDOWN;
3614		goto out;
3615	}
3616
3617	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3618	    !netif_carrier_ok(dev))
3619		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3620
3621	err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3622				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3623	if (err)
3624		goto out;
3625
3626pcpu_alloc:
3627	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3628	if (!fib6_nh->rt6i_pcpu) {
3629		err = -ENOMEM;
3630		goto out;
3631	}
3632
3633	fib6_nh->fib_nh_dev = dev;
3634	fib6_nh->fib_nh_oif = dev->ifindex;
3635	err = 0;
3636out:
3637	if (idev)
3638		in6_dev_put(idev);
3639
3640	if (err) {
3641		lwtstate_put(fib6_nh->fib_nh_lws);
3642		fib6_nh->fib_nh_lws = NULL;
3643		netdev_put(dev, dev_tracker);
3644	}
3645
3646	return err;
3647}
3648
3649void fib6_nh_release(struct fib6_nh *fib6_nh)
3650{
3651	struct rt6_exception_bucket *bucket;
3652
3653	rcu_read_lock();
3654
3655	fib6_nh_flush_exceptions(fib6_nh, NULL);
3656	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3657	if (bucket) {
3658		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3659		kfree(bucket);
3660	}
3661
3662	rcu_read_unlock();
3663
3664	fib6_nh_release_dsts(fib6_nh);
3665	free_percpu(fib6_nh->rt6i_pcpu);
3666
3667	fib_nh_common_release(&fib6_nh->nh_common);
3668}
3669
3670void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
3671{
3672	int cpu;
3673
3674	if (!fib6_nh->rt6i_pcpu)
3675		return;
3676
3677	for_each_possible_cpu(cpu) {
3678		struct rt6_info *pcpu_rt, **ppcpu_rt;
3679
3680		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3681		pcpu_rt = xchg(ppcpu_rt, NULL);
3682		if (pcpu_rt) {
3683			dst_dev_put(&pcpu_rt->dst);
3684			dst_release(&pcpu_rt->dst);
3685		}
 
 
 
3686	}
3687}
3688
3689static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3690					      gfp_t gfp_flags,
3691					      struct netlink_ext_ack *extack)
3692{
3693	struct net *net = cfg->fc_nlinfo.nl_net;
3694	struct fib6_info *rt = NULL;
3695	struct nexthop *nh = NULL;
3696	struct fib6_table *table;
3697	struct fib6_nh *fib6_nh;
3698	int err = -EINVAL;
3699	int addr_type;
3700
3701	/* RTF_PCPU is an internal flag; can not be set by userspace */
3702	if (cfg->fc_flags & RTF_PCPU) {
3703		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3704		goto out;
3705	}
3706
3707	/* RTF_CACHE is an internal flag; can not be set by userspace */
3708	if (cfg->fc_flags & RTF_CACHE) {
3709		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3710		goto out;
3711	}
3712
3713	if (cfg->fc_type > RTN_MAX) {
3714		NL_SET_ERR_MSG(extack, "Invalid route type");
3715		goto out;
3716	}
3717
3718	if (cfg->fc_dst_len > 128) {
3719		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3720		goto out;
3721	}
3722	if (cfg->fc_src_len > 128) {
3723		NL_SET_ERR_MSG(extack, "Invalid source address length");
3724		goto out;
3725	}
3726#ifndef CONFIG_IPV6_SUBTREES
3727	if (cfg->fc_src_len) {
3728		NL_SET_ERR_MSG(extack,
3729			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3730		goto out;
3731	}
3732#endif
3733	if (cfg->fc_nh_id) {
3734		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3735		if (!nh) {
3736			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3737			goto out;
3738		}
3739		err = fib6_check_nexthop(nh, cfg, extack);
3740		if (err)
 
 
 
 
 
 
 
3741			goto out;
3742	}
3743
3744	err = -ENOBUFS;
3745	if (cfg->fc_nlinfo.nlh &&
3746	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3747		table = fib6_get_table(net, cfg->fc_table);
3748		if (!table) {
3749			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3750			table = fib6_new_table(net, cfg->fc_table);
3751		}
3752	} else {
3753		table = fib6_new_table(net, cfg->fc_table);
3754	}
3755
3756	if (!table)
3757		goto out;
3758
3759	err = -ENOMEM;
3760	rt = fib6_info_alloc(gfp_flags, !nh);
3761	if (!rt)
3762		goto out;
3763
3764	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3765					       extack);
3766	if (IS_ERR(rt->fib6_metrics)) {
3767		err = PTR_ERR(rt->fib6_metrics);
3768		/* Do not leave garbage there. */
3769		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3770		goto out_free;
3771	}
3772
3773	if (cfg->fc_flags & RTF_ADDRCONF)
3774		rt->dst_nocount = true;
3775
3776	if (cfg->fc_flags & RTF_EXPIRES)
3777		fib6_set_expires(rt, jiffies +
3778				clock_t_to_jiffies(cfg->fc_expires));
3779
3780	if (cfg->fc_protocol == RTPROT_UNSPEC)
3781		cfg->fc_protocol = RTPROT_BOOT;
3782	rt->fib6_protocol = cfg->fc_protocol;
 
3783
3784	rt->fib6_table = table;
3785	rt->fib6_metric = cfg->fc_metric;
3786	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3787	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3788
3789	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3790	rt->fib6_dst.plen = cfg->fc_dst_len;
 
 
 
3791
3792#ifdef CONFIG_IPV6_SUBTREES
3793	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3794	rt->fib6_src.plen = cfg->fc_src_len;
3795#endif
3796	if (nh) {
3797		if (rt->fib6_src.plen) {
3798			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3799			goto out_free;
3800		}
3801		if (!nexthop_get(nh)) {
3802			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3803			goto out_free;
3804		}
3805		rt->nh = nh;
3806		fib6_nh = nexthop_fib6_nh(rt->nh);
3807	} else {
3808		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3809		if (err)
3810			goto out;
3811
3812		fib6_nh = rt->fib6_nh;
3813
3814		/* We cannot add true routes via loopback here, they would
3815		 * result in kernel looping; promote them to reject routes
3816		 */
3817		addr_type = ipv6_addr_type(&cfg->fc_dst);
3818		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3819				   addr_type))
3820			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3821	}
3822
3823	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3824		struct net_device *dev = fib6_nh->fib_nh_dev;
3825
3826		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3827			NL_SET_ERR_MSG(extack, "Invalid source address");
3828			err = -EINVAL;
3829			goto out;
3830		}
3831		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3832		rt->fib6_prefsrc.plen = 128;
3833	} else
3834		rt->fib6_prefsrc.plen = 0;
3835
3836	return rt;
3837out:
3838	fib6_info_release(rt);
3839	return ERR_PTR(err);
3840out_free:
3841	ip_fib_metrics_put(rt->fib6_metrics);
3842	kfree(rt);
3843	return ERR_PTR(err);
 
3844}
3845
3846int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3847		  struct netlink_ext_ack *extack)
3848{
3849	struct fib6_info *rt;
3850	int err;
 
 
3851
3852	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3853	if (IS_ERR(rt))
3854		return PTR_ERR(rt);
3855
3856	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3857	fib6_info_release(rt);
3858
3859	return err;
3860}
3861
3862static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3863{
3864	struct net *net = info->nl_net;
3865	struct fib6_table *table;
3866	int err;
3867
3868	if (rt == net->ipv6.fib6_null_entry) {
3869		err = -ENOENT;
3870		goto out;
3871	}
3872
3873	table = rt->fib6_table;
3874	spin_lock_bh(&table->tb6_lock);
3875	err = fib6_del(rt, info);
3876	spin_unlock_bh(&table->tb6_lock);
3877
3878out:
3879	fib6_info_release(rt);
3880	return err;
3881}
3882
3883int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3884{
3885	struct nl_info info = {
3886		.nl_net = net,
3887		.skip_notify = skip_notify
3888	};
3889
3890	return __ip6_del_rt(rt, &info);
3891}
3892
3893static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3894{
3895	struct nl_info *info = &cfg->fc_nlinfo;
3896	struct net *net = info->nl_net;
3897	struct sk_buff *skb = NULL;
3898	struct fib6_table *table;
3899	int err = -ENOENT;
 
 
3900
3901	if (rt == net->ipv6.fib6_null_entry)
3902		goto out_put;
3903	table = rt->fib6_table;
3904	spin_lock_bh(&table->tb6_lock);
3905
3906	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3907		struct fib6_info *sibling, *next_sibling;
3908		struct fib6_node *fn;
3909
3910		/* prefer to send a single notification with all hops */
3911		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3912		if (skb) {
3913			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3914
3915			if (rt6_fill_node(net, skb, rt, NULL,
3916					  NULL, NULL, 0, RTM_DELROUTE,
3917					  info->portid, seq, 0) < 0) {
3918				kfree_skb(skb);
3919				skb = NULL;
3920			} else
3921				info->skip_notify = 1;
3922		}
 
3923
3924		/* 'rt' points to the first sibling route. If it is not the
3925		 * leaf, then we do not need to send a notification. Otherwise,
3926		 * we need to check if the last sibling has a next route or not
3927		 * and emit a replace or delete notification, respectively.
3928		 */
3929		info->skip_notify_kernel = 1;
3930		fn = rcu_dereference_protected(rt->fib6_node,
3931					    lockdep_is_held(&table->tb6_lock));
3932		if (rcu_access_pointer(fn->leaf) == rt) {
3933			struct fib6_info *last_sibling, *replace_rt;
3934
3935			last_sibling = list_last_entry(&rt->fib6_siblings,
3936						       struct fib6_info,
3937						       fib6_siblings);
3938			replace_rt = rcu_dereference_protected(
3939					    last_sibling->fib6_next,
3940					    lockdep_is_held(&table->tb6_lock));
3941			if (replace_rt)
3942				call_fib6_entry_notifiers_replace(net,
3943								  replace_rt);
3944			else
3945				call_fib6_multipath_entry_notifiers(net,
3946						       FIB_EVENT_ENTRY_DEL,
3947						       rt, rt->fib6_nsiblings,
3948						       NULL);
3949		}
3950		list_for_each_entry_safe(sibling, next_sibling,
3951					 &rt->fib6_siblings,
3952					 fib6_siblings) {
3953			err = fib6_del(sibling, info);
3954			if (err)
3955				goto out_unlock;
3956		}
3957	}
 
3958
3959	err = fib6_del(rt, info);
3960out_unlock:
3961	spin_unlock_bh(&table->tb6_lock);
3962out_put:
3963	fib6_info_release(rt);
3964
3965	if (skb) {
3966		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3967			    info->nlh, gfp_any());
3968	}
3969	return err;
3970}
3971
3972static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 
 
 
 
 
 
 
 
 
 
 
3973{
3974	int rc = -ESRCH;
 
 
3975
3976	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3977		goto out;
 
 
 
 
 
 
 
 
3978
3979	if (cfg->fc_flags & RTF_GATEWAY &&
3980	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3981		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3982
3983	rc = rt6_remove_exception_rt(rt);
 
 
3984out:
3985	return rc;
3986}
3987
3988static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3989			     struct fib6_nh *nh)
3990{
3991	struct fib6_result res = {
3992		.f6i = rt,
3993		.nh = nh,
3994	};
3995	struct rt6_info *rt_cache;
3996
3997	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3998	if (rt_cache)
3999		return __ip6_del_cached_rt(rt_cache, cfg);
4000
4001	return 0;
4002}
4003
4004struct fib6_nh_del_cached_rt_arg {
4005	struct fib6_config *cfg;
4006	struct fib6_info *f6i;
4007};
4008
4009static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
 
 
 
4010{
4011	struct fib6_nh_del_cached_rt_arg *arg = _arg;
4012	int rc;
 
 
 
 
 
 
 
4013
4014	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
4015	return rc != -ESRCH ? rc : 0;
4016}
4017
4018static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
4019{
4020	struct fib6_nh_del_cached_rt_arg arg = {
4021		.cfg = cfg,
4022		.f6i = f6i
4023	};
4024
4025	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
 
4026}
4027
4028static int ip6_route_del(struct fib6_config *cfg,
4029			 struct netlink_ext_ack *extack)
 
4030{
4031	struct fib6_table *table;
4032	struct fib6_info *rt;
4033	struct fib6_node *fn;
4034	int err = -ESRCH;
 
4035
4036	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
4037	if (!table) {
4038		NL_SET_ERR_MSG(extack, "FIB table does not exist");
4039		return err;
 
4040	}
4041
4042	rcu_read_lock();
 
 
4043
4044	fn = fib6_locate(&table->tb6_root,
4045			 &cfg->fc_dst, cfg->fc_dst_len,
4046			 &cfg->fc_src, cfg->fc_src_len,
4047			 !(cfg->fc_flags & RTF_CACHE));
 
 
4048
4049	if (fn) {
4050		for_each_fib6_node_rt_rcu(fn) {
4051			struct fib6_nh *nh;
 
 
 
4052
4053			if (rt->nh && cfg->fc_nh_id &&
4054			    rt->nh->id != cfg->fc_nh_id)
4055				continue;
4056
4057			if (cfg->fc_flags & RTF_CACHE) {
4058				int rc = 0;
 
4059
4060				if (rt->nh) {
4061					rc = ip6_del_cached_rt_nh(cfg, rt);
4062				} else if (cfg->fc_nh_id) {
4063					continue;
4064				} else {
4065					nh = rt->fib6_nh;
4066					rc = ip6_del_cached_rt(cfg, rt, nh);
4067				}
4068				if (rc != -ESRCH) {
4069					rcu_read_unlock();
4070					return rc;
4071				}
4072				continue;
4073			}
4074
4075			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
4076				continue;
4077			if (cfg->fc_protocol &&
4078			    cfg->fc_protocol != rt->fib6_protocol)
4079				continue;
4080
4081			if (rt->nh) {
4082				if (!fib6_info_hold_safe(rt))
4083					continue;
4084				rcu_read_unlock();
4085
4086				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4087			}
4088			if (cfg->fc_nh_id)
4089				continue;
4090
4091			nh = rt->fib6_nh;
4092			if (cfg->fc_ifindex &&
4093			    (!nh->fib_nh_dev ||
4094			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
4095				continue;
4096			if (cfg->fc_flags & RTF_GATEWAY &&
4097			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
4098				continue;
4099			if (!fib6_info_hold_safe(rt))
4100				continue;
4101			rcu_read_unlock();
4102
4103			/* if gateway was specified only delete the one hop */
4104			if (cfg->fc_flags & RTF_GATEWAY)
4105				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4106
4107			return __ip6_del_rt_siblings(rt, cfg);
4108		}
4109	}
4110	rcu_read_unlock();
4111
4112	return err;
 
4113}
4114
4115static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 
 
 
 
 
 
4116{
4117	struct netevent_redirect netevent;
4118	struct rt6_info *rt, *nrt = NULL;
4119	struct fib6_result res = {};
4120	struct ndisc_options ndopts;
4121	struct inet6_dev *in6_dev;
4122	struct neighbour *neigh;
4123	struct rd_msg *msg;
4124	int optlen, on_link;
4125	u8 *lladdr;
4126
4127	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4128	optlen -= sizeof(*msg);
4129
4130	if (optlen < 0) {
4131		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4132		return;
4133	}
4134
4135	msg = (struct rd_msg *)icmp6_hdr(skb);
 
4136
4137	if (ipv6_addr_is_multicast(&msg->dest)) {
4138		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4139		return;
 
 
 
 
 
 
4140	}
4141
4142	on_link = 0;
4143	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4144		on_link = 1;
4145	} else if (ipv6_addr_type(&msg->target) !=
4146		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4147		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4148		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4149	}
4150
4151	in6_dev = __in6_dev_get(skb->dev);
4152	if (!in6_dev)
4153		return;
4154	if (READ_ONCE(in6_dev->cnf.forwarding) ||
4155	    !READ_ONCE(in6_dev->cnf.accept_redirects))
4156		return;
4157
4158	/* RFC2461 8.1:
4159	 *	The IP source address of the Redirect MUST be the same as the current
4160	 *	first-hop router for the specified ICMP Destination Address.
4161	 */
 
 
 
 
4162
4163	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4164		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4165		return;
4166	}
 
 
 
4167
4168	lladdr = NULL;
4169	if (ndopts.nd_opts_tgt_lladdr) {
4170		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4171					     skb->dev);
4172		if (!lladdr) {
4173			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4174			return;
4175		}
4176	}
4177
4178	rt = dst_rt6_info(dst);
4179	if (rt->rt6i_flags & RTF_REJECT) {
4180		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4181		return;
4182	}
 
 
 
4183
4184	/* Redirect received -> path was valid.
4185	 * Look, redirects are sent only in response to data packets,
4186	 * so that this nexthop apparently is reachable. --ANK
4187	 */
4188	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4189
4190	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4191	if (!neigh)
4192		return;
4193
4194	/*
4195	 *	We have finally decided to accept it.
 
 
 
 
 
 
 
 
 
4196	 */
 
 
 
4197
4198	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4199		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4200		     NEIGH_UPDATE_F_OVERRIDE|
4201		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4202				     NEIGH_UPDATE_F_ISROUTER)),
4203		     NDISC_REDIRECT, &ndopts);
4204
4205	rcu_read_lock();
4206	res.f6i = rcu_dereference(rt->from);
4207	if (!res.f6i)
4208		goto out;
 
 
4209
4210	if (res.f6i->nh) {
4211		struct fib6_nh_match_arg arg = {
4212			.dev = dst->dev,
4213			.gw = &rt->rt6i_gateway,
4214		};
 
 
 
 
 
 
 
 
 
 
 
 
 
4215
4216		nexthop_for_each_fib6_nh(res.f6i->nh,
4217					 fib6_nh_find_match, &arg);
4218
4219		/* fib6_info uses a nexthop that does not have fib6_nh
4220		 * using the dst->dev. Should be impossible
4221		 */
4222		if (!arg.match)
4223			goto out;
4224		res.nh = arg.match;
4225	} else {
4226		res.nh = res.f6i->fib6_nh;
4227	}
4228
4229	res.fib6_flags = res.f6i->fib6_flags;
4230	res.fib6_type = res.f6i->fib6_type;
4231	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4232	if (!nrt)
4233		goto out;
4234
4235	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4236	if (on_link)
4237		nrt->rt6i_flags &= ~RTF_GATEWAY;
4238
4239	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4240
4241	/* rt6_insert_exception() will take care of duplicated exceptions */
4242	if (rt6_insert_exception(nrt, &res)) {
4243		dst_release_immediate(&nrt->dst);
4244		goto out;
4245	}
4246
4247	netevent.old = &rt->dst;
4248	netevent.new = &nrt->dst;
4249	netevent.daddr = &msg->dest;
4250	netevent.neigh = neigh;
4251	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4252
4253out:
4254	rcu_read_unlock();
4255	neigh_release(neigh);
4256}
4257
4258#ifdef CONFIG_IPV6_ROUTE_INFO
4259static struct fib6_info *rt6_get_route_info(struct net *net,
4260					   const struct in6_addr *prefix, int prefixlen,
4261					   const struct in6_addr *gwaddr,
4262					   struct net_device *dev)
4263{
4264	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4265	int ifindex = dev->ifindex;
4266	struct fib6_node *fn;
4267	struct fib6_info *rt = NULL;
4268	struct fib6_table *table;
4269
4270	table = fib6_get_table(net, tb_id);
4271	if (!table)
4272		return NULL;
4273
4274	rcu_read_lock();
4275	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4276	if (!fn)
4277		goto out;
4278
4279	for_each_fib6_node_rt_rcu(fn) {
4280		/* these routes do not use nexthops */
4281		if (rt->nh)
4282			continue;
4283		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4284			continue;
4285		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4286		    !rt->fib6_nh->fib_nh_gw_family)
4287			continue;
4288		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4289			continue;
4290		if (!fib6_info_hold_safe(rt))
4291			continue;
 
4292		break;
4293	}
4294out:
4295	rcu_read_unlock();
4296	return rt;
4297}
4298
4299static struct fib6_info *rt6_add_route_info(struct net *net,
4300					   const struct in6_addr *prefix, int prefixlen,
4301					   const struct in6_addr *gwaddr,
4302					   struct net_device *dev,
4303					   unsigned int pref)
4304{
4305	struct fib6_config cfg = {
 
4306		.fc_metric	= IP6_RT_PRIO_USER,
4307		.fc_ifindex	= dev->ifindex,
4308		.fc_dst_len	= prefixlen,
4309		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4310				  RTF_UP | RTF_PREF(pref),
4311		.fc_protocol = RTPROT_RA,
4312		.fc_type = RTN_UNICAST,
4313		.fc_nlinfo.portid = 0,
4314		.fc_nlinfo.nlh = NULL,
4315		.fc_nlinfo.nl_net = net,
4316	};
4317
4318	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4319	cfg.fc_dst = *prefix;
4320	cfg.fc_gateway = *gwaddr;
4321
4322	/* We should treat it as a default route if prefix length is 0. */
4323	if (!prefixlen)
4324		cfg.fc_flags |= RTF_DEFAULT;
4325
4326	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4327
4328	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4329}
4330#endif
4331
4332struct fib6_info *rt6_get_dflt_router(struct net *net,
4333				     const struct in6_addr *addr,
4334				     struct net_device *dev)
4335{
4336	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4337	struct fib6_info *rt;
4338	struct fib6_table *table;
4339
4340	table = fib6_get_table(net, tb_id);
4341	if (!table)
4342		return NULL;
4343
4344	rcu_read_lock();
4345	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4346		struct fib6_nh *nh;
4347
4348		/* RA routes do not use nexthops */
4349		if (rt->nh)
4350			continue;
4351
4352		nh = rt->fib6_nh;
4353		if (dev == nh->fib_nh_dev &&
4354		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4355		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4356			break;
4357	}
4358	if (rt && !fib6_info_hold_safe(rt))
4359		rt = NULL;
4360	rcu_read_unlock();
4361	return rt;
4362}
4363
4364struct fib6_info *rt6_add_dflt_router(struct net *net,
4365				     const struct in6_addr *gwaddr,
4366				     struct net_device *dev,
4367				     unsigned int pref,
4368				     u32 defrtr_usr_metric,
4369				     int lifetime)
4370{
4371	struct fib6_config cfg = {
4372		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4373		.fc_metric	= defrtr_usr_metric,
4374		.fc_ifindex	= dev->ifindex,
4375		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4376				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4377		.fc_protocol = RTPROT_RA,
4378		.fc_type = RTN_UNICAST,
4379		.fc_nlinfo.portid = 0,
4380		.fc_nlinfo.nlh = NULL,
4381		.fc_nlinfo.nl_net = net,
4382		.fc_expires = jiffies_to_clock_t(lifetime * HZ),
4383	};
4384
4385	cfg.fc_gateway = *gwaddr;
4386
4387	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4388		struct fib6_table *table;
4389
4390		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4391		if (table)
4392			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4393	}
4394
4395	return rt6_get_dflt_router(net, gwaddr, dev);
4396}
4397
4398static void __rt6_purge_dflt_routers(struct net *net,
4399				     struct fib6_table *table)
4400{
4401	struct fib6_info *rt;
4402
4403restart:
4404	rcu_read_lock();
4405	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4406		struct net_device *dev = fib6_info_nh_dev(rt);
4407		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4408
4409		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4410		    (!idev || idev->cnf.accept_ra != 2) &&
4411		    fib6_info_hold_safe(rt)) {
4412			rcu_read_unlock();
4413			ip6_del_rt(net, rt, false);
4414			goto restart;
4415		}
4416	}
4417	rcu_read_unlock();
4418
4419	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4420}
4421
4422void rt6_purge_dflt_routers(struct net *net)
4423{
 
4424	struct fib6_table *table;
4425	struct hlist_head *head;
4426	unsigned int h;
4427
4428	rcu_read_lock();
 
 
 
4429
4430	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4431		head = &net->ipv6.fib_table_hash[h];
4432		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4433			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4434				__rt6_purge_dflt_routers(net, table);
 
 
 
4435		}
4436	}
4437
4438	rcu_read_unlock();
4439}
4440
4441static void rtmsg_to_fib6_config(struct net *net,
4442				 struct in6_rtmsg *rtmsg,
4443				 struct fib6_config *cfg)
4444{
4445	*cfg = (struct fib6_config){
4446		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4447			 : RT6_TABLE_MAIN,
4448		.fc_ifindex = rtmsg->rtmsg_ifindex,
4449		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4450		.fc_expires = rtmsg->rtmsg_info,
4451		.fc_dst_len = rtmsg->rtmsg_dst_len,
4452		.fc_src_len = rtmsg->rtmsg_src_len,
4453		.fc_flags = rtmsg->rtmsg_flags,
4454		.fc_type = rtmsg->rtmsg_type,
4455
4456		.fc_nlinfo.nl_net = net,
4457
4458		.fc_dst = rtmsg->rtmsg_dst,
4459		.fc_src = rtmsg->rtmsg_src,
4460		.fc_gateway = rtmsg->rtmsg_gateway,
4461	};
4462}
4463
4464int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4465{
4466	struct fib6_config cfg;
 
4467	int err;
4468
4469	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4470		return -EINVAL;
4471	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4472		return -EPERM;
 
 
 
 
 
 
 
4473
4474	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
 
 
 
 
 
 
 
 
 
 
4475
4476	rtnl_lock();
4477	switch (cmd) {
4478	case SIOCADDRT:
4479		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4480		break;
4481	case SIOCDELRT:
4482		err = ip6_route_del(&cfg, NULL);
4483		break;
4484	}
4485	rtnl_unlock();
4486	return err;
4487}
4488
4489/*
4490 *	Drop the packet on the floor
4491 */
4492
4493static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4494{
 
4495	struct dst_entry *dst = skb_dst(skb);
4496	struct net *net = dev_net(dst->dev);
4497	struct inet6_dev *idev;
4498	SKB_DR(reason);
4499	int type;
4500
4501	if (netif_is_l3_master(skb->dev) ||
4502	    dst->dev == net->loopback_dev)
4503		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4504	else
4505		idev = ip6_dst_idev(dst);
4506
4507	switch (ipstats_mib_noroutes) {
4508	case IPSTATS_MIB_INNOROUTES:
4509		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4510		if (type == IPV6_ADDR_ANY) {
4511			SKB_DR_SET(reason, IP_INADDRERRORS);
4512			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
4513			break;
4514		}
4515		SKB_DR_SET(reason, IP_INNOROUTES);
4516		fallthrough;
4517	case IPSTATS_MIB_OUTNOROUTES:
4518		SKB_DR_OR(reason, IP_OUTNOROUTES);
4519		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
4520		break;
4521	}
4522
4523	/* Start over by dropping the dst for l3mdev case */
4524	if (netif_is_l3_master(skb->dev))
4525		skb_dst_drop(skb);
4526
4527	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4528	kfree_skb_reason(skb, reason);
4529	return 0;
4530}
4531
4532static int ip6_pkt_discard(struct sk_buff *skb)
4533{
4534	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4535}
4536
4537static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4538{
4539	skb->dev = skb_dst(skb)->dev;
4540	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4541}
4542
 
 
4543static int ip6_pkt_prohibit(struct sk_buff *skb)
4544{
4545	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4546}
4547
4548static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4549{
4550	skb->dev = skb_dst(skb)->dev;
4551	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4552}
4553
 
 
4554/*
4555 *	Allocate a dst for local (unicast / anycast) address.
4556 */
4557
4558struct fib6_info *addrconf_f6i_alloc(struct net *net,
4559				     struct inet6_dev *idev,
4560				     const struct in6_addr *addr,
4561				     bool anycast, gfp_t gfp_flags,
4562				     struct netlink_ext_ack *extack)
4563{
4564	struct fib6_config cfg = {
4565		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4566		.fc_ifindex = idev->dev->ifindex,
4567		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4568		.fc_dst = *addr,
4569		.fc_dst_len = 128,
4570		.fc_protocol = RTPROT_KERNEL,
4571		.fc_nlinfo.nl_net = net,
4572		.fc_ignore_dev_down = true,
4573	};
4574	struct fib6_info *f6i;
4575
4576	if (anycast) {
4577		cfg.fc_type = RTN_ANYCAST;
4578		cfg.fc_flags |= RTF_ANYCAST;
4579	} else {
4580		cfg.fc_type = RTN_LOCAL;
4581		cfg.fc_flags |= RTF_LOCAL;
4582	}
4583
4584	f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
4585	if (!IS_ERR(f6i)) {
4586		f6i->dst_nocount = true;
 
 
 
 
 
 
 
 
 
 
 
 
 
4587
4588		if (!anycast &&
4589		    (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
4590		     READ_ONCE(idev->cnf.disable_policy)))
4591			f6i->dst_nopolicy = true;
4592	}
 
4593
4594	return f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4595}
4596
4597/* remove deleted ip from prefsrc entries */
4598struct arg_dev_net_ip {
 
4599	struct net *net;
4600	struct in6_addr *addr;
4601};
4602
4603static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4604{
 
4605	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4606	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4607
4608	if (!rt->nh &&
4609	    rt != net->ipv6.fib6_null_entry &&
4610	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
4611	    !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
4612		spin_lock_bh(&rt6_exception_lock);
4613		/* remove prefsrc entry */
4614		rt->fib6_prefsrc.plen = 0;
4615		spin_unlock_bh(&rt6_exception_lock);
4616	}
4617	return 0;
4618}
4619
4620void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4621{
4622	struct net *net = dev_net(ifp->idev->dev);
4623	struct arg_dev_net_ip adni = {
 
4624		.net = net,
4625		.addr = &ifp->addr,
4626	};
4627	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4628}
4629
4630#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
 
 
4631
4632/* Remove routers and update dst entries when gateway turn into host. */
4633static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4634{
4635	struct in6_addr *gateway = (struct in6_addr *)arg;
4636	struct fib6_nh *nh;
4637
4638	/* RA routes do not use nexthops */
4639	if (rt->nh)
4640		return 0;
4641
4642	nh = rt->fib6_nh;
4643	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4644	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4645		return -1;
4646
4647	/* Further clean up cached routes in exception table.
4648	 * This is needed because cached route may have a different
4649	 * gateway than its 'parent' in the case of an ip redirect.
4650	 */
4651	fib6_nh_exceptions_clean_tohost(nh, gateway);
4652
4653	return 0;
4654}
4655
4656void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4657{
4658	fib6_clean_all(net, fib6_clean_tohost, gateway);
4659}
4660
4661struct arg_netdev_event {
4662	const struct net_device *dev;
4663	union {
4664		unsigned char nh_flags;
4665		unsigned long event;
4666	};
4667};
4668
4669static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4670{
4671	struct fib6_info *iter;
4672	struct fib6_node *fn;
4673
4674	fn = rcu_dereference_protected(rt->fib6_node,
4675			lockdep_is_held(&rt->fib6_table->tb6_lock));
4676	iter = rcu_dereference_protected(fn->leaf,
4677			lockdep_is_held(&rt->fib6_table->tb6_lock));
4678	while (iter) {
4679		if (iter->fib6_metric == rt->fib6_metric &&
4680		    rt6_qualify_for_ecmp(iter))
4681			return iter;
4682		iter = rcu_dereference_protected(iter->fib6_next,
4683				lockdep_is_held(&rt->fib6_table->tb6_lock));
4684	}
4685
4686	return NULL;
4687}
4688
4689/* only called for fib entries with builtin fib6_nh */
4690static bool rt6_is_dead(const struct fib6_info *rt)
4691{
4692	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4693	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4694	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4695		return true;
4696
4697	return false;
4698}
4699
4700static int rt6_multipath_total_weight(const struct fib6_info *rt)
4701{
4702	struct fib6_info *iter;
4703	int total = 0;
4704
4705	if (!rt6_is_dead(rt))
4706		total += rt->fib6_nh->fib_nh_weight;
4707
4708	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4709		if (!rt6_is_dead(iter))
4710			total += iter->fib6_nh->fib_nh_weight;
4711	}
4712
4713	return total;
4714}
4715
4716static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4717{
4718	int upper_bound = -1;
4719
4720	if (!rt6_is_dead(rt)) {
4721		*weight += rt->fib6_nh->fib_nh_weight;
4722		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4723						    total) - 1;
4724	}
4725	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4726}
4727
4728static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4729{
4730	struct fib6_info *iter;
4731	int weight = 0;
4732
4733	rt6_upper_bound_set(rt, &weight, total);
4734
4735	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4736		rt6_upper_bound_set(iter, &weight, total);
4737}
4738
4739void rt6_multipath_rebalance(struct fib6_info *rt)
4740{
4741	struct fib6_info *first;
4742	int total;
4743
4744	/* In case the entire multipath route was marked for flushing,
4745	 * then there is no need to rebalance upon the removal of every
4746	 * sibling route.
4747	 */
4748	if (!rt->fib6_nsiblings || rt->should_flush)
4749		return;
4750
4751	/* During lookup routes are evaluated in order, so we need to
4752	 * make sure upper bounds are assigned from the first sibling
4753	 * onwards.
4754	 */
4755	first = rt6_multipath_first_sibling(rt);
4756	if (WARN_ON_ONCE(!first))
4757		return;
4758
4759	total = rt6_multipath_total_weight(first);
4760	rt6_multipath_upper_bound_set(first, total);
4761}
4762
4763static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4764{
4765	const struct arg_netdev_event *arg = p_arg;
4766	struct net *net = dev_net(arg->dev);
4767
4768	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4769	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4770		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4771		fib6_update_sernum_upto_root(net, rt);
4772		rt6_multipath_rebalance(rt);
4773	}
4774
4775	return 0;
4776}
4777
4778void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4779{
4780	struct arg_netdev_event arg = {
4781		.dev = dev,
4782		{
4783			.nh_flags = nh_flags,
4784		},
4785	};
4786
4787	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4788		arg.nh_flags |= RTNH_F_LINKDOWN;
4789
4790	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4791}
4792
4793/* only called for fib entries with inline fib6_nh */
4794static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4795				   const struct net_device *dev)
4796{
4797	struct fib6_info *iter;
4798
4799	if (rt->fib6_nh->fib_nh_dev == dev)
4800		return true;
4801	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4802		if (iter->fib6_nh->fib_nh_dev == dev)
4803			return true;
4804
4805	return false;
4806}
4807
4808static void rt6_multipath_flush(struct fib6_info *rt)
4809{
4810	struct fib6_info *iter;
4811
4812	rt->should_flush = 1;
4813	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4814		iter->should_flush = 1;
4815}
4816
4817static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4818					     const struct net_device *down_dev)
4819{
4820	struct fib6_info *iter;
4821	unsigned int dead = 0;
4822
4823	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4824	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4825		dead++;
4826	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4827		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4828		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4829			dead++;
4830
4831	return dead;
4832}
4833
4834static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4835				       const struct net_device *dev,
4836				       unsigned char nh_flags)
4837{
4838	struct fib6_info *iter;
4839
4840	if (rt->fib6_nh->fib_nh_dev == dev)
4841		rt->fib6_nh->fib_nh_flags |= nh_flags;
4842	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4843		if (iter->fib6_nh->fib_nh_dev == dev)
4844			iter->fib6_nh->fib_nh_flags |= nh_flags;
4845}
4846
4847/* called with write lock held for table with rt */
4848static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4849{
4850	const struct arg_netdev_event *arg = p_arg;
4851	const struct net_device *dev = arg->dev;
4852	struct net *net = dev_net(dev);
4853
4854	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4855		return 0;
4856
4857	switch (arg->event) {
4858	case NETDEV_UNREGISTER:
4859		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4860	case NETDEV_DOWN:
4861		if (rt->should_flush)
4862			return -1;
4863		if (!rt->fib6_nsiblings)
4864			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4865		if (rt6_multipath_uses_dev(rt, dev)) {
4866			unsigned int count;
4867
4868			count = rt6_multipath_dead_count(rt, dev);
4869			if (rt->fib6_nsiblings + 1 == count) {
4870				rt6_multipath_flush(rt);
4871				return -1;
4872			}
4873			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4874						   RTNH_F_LINKDOWN);
4875			fib6_update_sernum(net, rt);
4876			rt6_multipath_rebalance(rt);
4877		}
4878		return -2;
4879	case NETDEV_CHANGE:
4880		if (rt->fib6_nh->fib_nh_dev != dev ||
4881		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4882			break;
4883		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4884		rt6_multipath_rebalance(rt);
4885		break;
4886	}
4887
4888	return 0;
4889}
4890
4891void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4892{
4893	struct arg_netdev_event arg = {
4894		.dev = dev,
4895		{
4896			.event = event,
4897		},
4898	};
4899	struct net *net = dev_net(dev);
4900
4901	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4902		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4903	else
4904		fib6_clean_all(net, fib6_ifdown, &arg);
4905}
4906
4907void rt6_disable_ip(struct net_device *dev, unsigned long event)
4908{
4909	rt6_sync_down_dev(dev, event);
4910	rt6_uncached_list_flush_dev(dev);
4911	neigh_ifdown(&nd_tbl, dev);
4912}
4913
4914struct rt6_mtu_change_arg {
4915	struct net_device *dev;
4916	unsigned int mtu;
4917	struct fib6_info *f6i;
4918};
4919
4920static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4921{
4922	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4923	struct fib6_info *f6i = arg->f6i;
4924
4925	/* For administrative MTU increase, there is no way to discover
4926	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4927	 * Since RFC 1981 doesn't include administrative MTU increase
4928	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4929	 */
4930	if (nh->fib_nh_dev == arg->dev) {
4931		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4932		u32 mtu = f6i->fib6_pmtu;
4933
4934		if (mtu >= arg->mtu ||
4935		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4936			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4937
4938		spin_lock_bh(&rt6_exception_lock);
4939		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4940		spin_unlock_bh(&rt6_exception_lock);
4941	}
4942
4943	return 0;
4944}
4945
4946static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4947{
4948	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4949	struct inet6_dev *idev;
4950
4951	/* In IPv6 pmtu discovery is not optional,
4952	   so that RTAX_MTU lock cannot disable it.
4953	   We still use this lock to block changes
4954	   caused by addrconf/ndisc.
4955	*/
4956
4957	idev = __in6_dev_get(arg->dev);
4958	if (!idev)
4959		return 0;
4960
4961	if (fib6_metric_locked(f6i, RTAX_MTU))
4962		return 0;
4963
4964	arg->f6i = f6i;
4965	if (f6i->nh) {
4966		/* fib6_nh_mtu_change only returns 0, so this is safe */
4967		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4968						arg);
 
 
 
 
 
 
 
 
 
 
 
 
4969	}
4970
4971	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4972}
4973
4974void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4975{
4976	struct rt6_mtu_change_arg arg = {
4977		.dev = dev,
4978		.mtu = mtu,
4979	};
4980
4981	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4982}
4983
4984static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4985	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4986	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4987	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4988	[RTA_OIF]               = { .type = NLA_U32 },
4989	[RTA_IIF]		= { .type = NLA_U32 },
4990	[RTA_PRIORITY]          = { .type = NLA_U32 },
4991	[RTA_METRICS]           = { .type = NLA_NESTED },
4992	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4993	[RTA_PREF]              = { .type = NLA_U8 },
4994	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4995	[RTA_ENCAP]		= { .type = NLA_NESTED },
4996	[RTA_EXPIRES]		= { .type = NLA_U32 },
4997	[RTA_UID]		= { .type = NLA_U32 },
4998	[RTA_MARK]		= { .type = NLA_U32 },
4999	[RTA_TABLE]		= { .type = NLA_U32 },
5000	[RTA_IP_PROTO]		= { .type = NLA_U8 },
5001	[RTA_SPORT]		= { .type = NLA_U16 },
5002	[RTA_DPORT]		= { .type = NLA_U16 },
5003	[RTA_NH_ID]		= { .type = NLA_U32 },
5004};
5005
5006static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
5007			      struct fib6_config *cfg,
5008			      struct netlink_ext_ack *extack)
5009{
5010	struct rtmsg *rtm;
5011	struct nlattr *tb[RTA_MAX+1];
5012	unsigned int pref;
5013	int err;
5014
5015	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5016				     rtm_ipv6_policy, extack);
5017	if (err < 0)
5018		goto errout;
5019
5020	err = -EINVAL;
5021	rtm = nlmsg_data(nlh);
 
5022
5023	if (rtm->rtm_tos) {
5024		NL_SET_ERR_MSG(extack,
5025			       "Invalid dsfield (tos): option not available for IPv6");
5026		goto errout;
5027	}
5028
5029	*cfg = (struct fib6_config){
5030		.fc_table = rtm->rtm_table,
5031		.fc_dst_len = rtm->rtm_dst_len,
5032		.fc_src_len = rtm->rtm_src_len,
5033		.fc_flags = RTF_UP,
5034		.fc_protocol = rtm->rtm_protocol,
5035		.fc_type = rtm->rtm_type,
5036
5037		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
5038		.fc_nlinfo.nlh = nlh,
5039		.fc_nlinfo.nl_net = sock_net(skb->sk),
5040	};
5041
5042	if (rtm->rtm_type == RTN_UNREACHABLE ||
5043	    rtm->rtm_type == RTN_BLACKHOLE ||
5044	    rtm->rtm_type == RTN_PROHIBIT ||
5045	    rtm->rtm_type == RTN_THROW)
5046		cfg->fc_flags |= RTF_REJECT;
5047
5048	if (rtm->rtm_type == RTN_LOCAL)
5049		cfg->fc_flags |= RTF_LOCAL;
5050
5051	if (rtm->rtm_flags & RTM_F_CLONED)
5052		cfg->fc_flags |= RTF_CACHE;
5053
5054	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
5055
5056	if (tb[RTA_NH_ID]) {
5057		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
5058		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
5059			NL_SET_ERR_MSG(extack,
5060				       "Nexthop specification and nexthop id are mutually exclusive");
5061			goto errout;
5062		}
5063		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
5064	}
5065
5066	if (tb[RTA_GATEWAY]) {
5067		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
5068		cfg->fc_flags |= RTF_GATEWAY;
5069	}
5070	if (tb[RTA_VIA]) {
5071		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
5072		goto errout;
5073	}
5074
5075	if (tb[RTA_DST]) {
5076		int plen = (rtm->rtm_dst_len + 7) >> 3;
5077
5078		if (nla_len(tb[RTA_DST]) < plen)
5079			goto errout;
5080
5081		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
5082	}
5083
5084	if (tb[RTA_SRC]) {
5085		int plen = (rtm->rtm_src_len + 7) >> 3;
5086
5087		if (nla_len(tb[RTA_SRC]) < plen)
5088			goto errout;
5089
5090		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
5091	}
5092
5093	if (tb[RTA_PREFSRC])
5094		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
5095
5096	if (tb[RTA_OIF])
5097		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
5098
5099	if (tb[RTA_PRIORITY])
5100		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
5101
5102	if (tb[RTA_METRICS]) {
5103		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
5104		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
5105	}
5106
5107	if (tb[RTA_TABLE])
5108		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
5109
5110	if (tb[RTA_MULTIPATH]) {
5111		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
5112		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
5113
5114		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
5115						     cfg->fc_mp_len, extack);
5116		if (err < 0)
5117			goto errout;
5118	}
5119
5120	if (tb[RTA_PREF]) {
5121		pref = nla_get_u8(tb[RTA_PREF]);
5122		if (pref != ICMPV6_ROUTER_PREF_LOW &&
5123		    pref != ICMPV6_ROUTER_PREF_HIGH)
5124			pref = ICMPV6_ROUTER_PREF_MEDIUM;
5125		cfg->fc_flags |= RTF_PREF(pref);
5126	}
5127
5128	if (tb[RTA_ENCAP])
5129		cfg->fc_encap = tb[RTA_ENCAP];
5130
5131	if (tb[RTA_ENCAP_TYPE]) {
5132		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5133
5134		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
5135		if (err < 0)
5136			goto errout;
5137	}
5138
5139	if (tb[RTA_EXPIRES]) {
5140		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5141
5142		if (addrconf_finite_timeout(timeout)) {
5143			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5144			cfg->fc_flags |= RTF_EXPIRES;
5145		}
5146	}
5147
5148	err = 0;
5149errout:
5150	return err;
5151}
5152
5153struct rt6_nh {
5154	struct fib6_info *fib6_info;
5155	struct fib6_config r_cfg;
5156	struct list_head next;
5157};
5158
5159static int ip6_route_info_append(struct net *net,
5160				 struct list_head *rt6_nh_list,
5161				 struct fib6_info *rt,
5162				 struct fib6_config *r_cfg)
5163{
5164	struct rt6_nh *nh;
5165	int err = -EEXIST;
5166
5167	list_for_each_entry(nh, rt6_nh_list, next) {
5168		/* check if fib6_info already exists */
5169		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
5170			return err;
5171	}
5172
5173	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5174	if (!nh)
5175		return -ENOMEM;
5176	nh->fib6_info = rt;
5177	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5178	list_add_tail(&nh->next, rt6_nh_list);
5179
5180	return 0;
5181}
5182
5183static void ip6_route_mpath_notify(struct fib6_info *rt,
5184				   struct fib6_info *rt_last,
5185				   struct nl_info *info,
5186				   __u16 nlflags)
5187{
5188	/* if this is an APPEND route, then rt points to the first route
5189	 * inserted and rt_last points to last route inserted. Userspace
5190	 * wants a consistent dump of the route which starts at the first
5191	 * nexthop. Since sibling routes are always added at the end of
5192	 * the list, find the first sibling of the last route appended
5193	 */
5194	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5195		rt = list_first_entry(&rt_last->fib6_siblings,
5196				      struct fib6_info,
5197				      fib6_siblings);
5198	}
5199
5200	if (rt)
5201		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5202}
5203
5204static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5205{
5206	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5207	bool should_notify = false;
5208	struct fib6_info *leaf;
5209	struct fib6_node *fn;
5210
5211	rcu_read_lock();
5212	fn = rcu_dereference(rt->fib6_node);
5213	if (!fn)
5214		goto out;
5215
5216	leaf = rcu_dereference(fn->leaf);
5217	if (!leaf)
5218		goto out;
5219
5220	if (rt == leaf ||
5221	    (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5222	     rt6_qualify_for_ecmp(leaf)))
5223		should_notify = true;
5224out:
5225	rcu_read_unlock();
5226
5227	return should_notify;
5228}
5229
5230static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
5231			     struct netlink_ext_ack *extack)
5232{
5233	if (nla_len(nla) < sizeof(*gw)) {
5234		NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
5235		return -EINVAL;
5236	}
5237
5238	*gw = nla_get_in6_addr(nla);
5239
5240	return 0;
5241}
5242
5243static int ip6_route_multipath_add(struct fib6_config *cfg,
5244				   struct netlink_ext_ack *extack)
5245{
5246	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5247	struct nl_info *info = &cfg->fc_nlinfo;
5248	struct fib6_config r_cfg;
5249	struct rtnexthop *rtnh;
5250	struct fib6_info *rt;
5251	struct rt6_nh *err_nh;
5252	struct rt6_nh *nh, *nh_safe;
5253	__u16 nlflags;
5254	int remaining;
5255	int attrlen;
5256	int err = 1;
5257	int nhn = 0;
5258	int replace = (cfg->fc_nlinfo.nlh &&
5259		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5260	LIST_HEAD(rt6_nh_list);
5261
5262	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5263	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5264		nlflags |= NLM_F_APPEND;
5265
5266	remaining = cfg->fc_mp_len;
5267	rtnh = (struct rtnexthop *)cfg->fc_mp;
5268
5269	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5270	 * fib6_info structs per nexthop
5271	 */
5272	while (rtnh_ok(rtnh, remaining)) {
5273		memcpy(&r_cfg, cfg, sizeof(*cfg));
5274		if (rtnh->rtnh_ifindex)
5275			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5276
5277		attrlen = rtnh_attrlen(rtnh);
5278		if (attrlen > 0) {
5279			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5280
5281			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5282			if (nla) {
5283				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5284							extack);
5285				if (err)
5286					goto cleanup;
5287
5288				r_cfg.fc_flags |= RTF_GATEWAY;
5289			}
5290			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5291
5292			/* RTA_ENCAP_TYPE length checked in
5293			 * lwtunnel_valid_encap_type_attr
5294			 */
5295			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5296			if (nla)
5297				r_cfg.fc_encap_type = nla_get_u16(nla);
5298		}
5299
5300		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5301		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5302		if (IS_ERR(rt)) {
5303			err = PTR_ERR(rt);
5304			rt = NULL;
5305			goto cleanup;
5306		}
5307		if (!rt6_qualify_for_ecmp(rt)) {
5308			err = -EINVAL;
5309			NL_SET_ERR_MSG(extack,
5310				       "Device only routes can not be added for IPv6 using the multipath API.");
5311			fib6_info_release(rt);
5312			goto cleanup;
5313		}
5314
5315		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5316
5317		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5318					    rt, &r_cfg);
5319		if (err) {
5320			fib6_info_release(rt);
5321			goto cleanup;
5322		}
5323
5324		rtnh = rtnh_next(rtnh, &remaining);
5325	}
5326
5327	if (list_empty(&rt6_nh_list)) {
5328		NL_SET_ERR_MSG(extack,
5329			       "Invalid nexthop configuration - no valid nexthops");
5330		return -EINVAL;
5331	}
5332
5333	/* for add and replace send one notification with all nexthops.
5334	 * Skip the notification in fib6_add_rt2node and send one with
5335	 * the full route when done
5336	 */
5337	info->skip_notify = 1;
5338
5339	/* For add and replace, send one notification with all nexthops. For
5340	 * append, send one notification with all appended nexthops.
5341	 */
5342	info->skip_notify_kernel = 1;
5343
5344	err_nh = NULL;
5345	list_for_each_entry(nh, &rt6_nh_list, next) {
5346		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5347
5348		if (err) {
5349			if (replace && nhn)
5350				NL_SET_ERR_MSG_MOD(extack,
5351						   "multipath route replace failed (check consistency of installed routes)");
5352			err_nh = nh;
5353			goto add_errout;
5354		}
5355		/* save reference to last route successfully inserted */
5356		rt_last = nh->fib6_info;
5357
5358		/* save reference to first route for notification */
5359		if (!rt_notif)
5360			rt_notif = nh->fib6_info;
5361
5362		/* Because each route is added like a single route we remove
5363		 * these flags after the first nexthop: if there is a collision,
5364		 * we have already failed to add the first nexthop:
5365		 * fib6_add_rt2node() has rejected it; when replacing, old
5366		 * nexthops have been replaced by first new, the rest should
5367		 * be added to it.
5368		 */
5369		if (cfg->fc_nlinfo.nlh) {
5370			cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5371							     NLM_F_REPLACE);
5372			cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5373		}
5374		nhn++;
5375	}
5376
5377	/* An in-kernel notification should only be sent in case the new
5378	 * multipath route is added as the first route in the node, or if
5379	 * it was appended to it. We pass 'rt_notif' since it is the first
5380	 * sibling and might allow us to skip some checks in the replace case.
5381	 */
5382	if (ip6_route_mpath_should_notify(rt_notif)) {
5383		enum fib_event_type fib_event;
5384
5385		if (rt_notif->fib6_nsiblings != nhn - 1)
5386			fib_event = FIB_EVENT_ENTRY_APPEND;
5387		else
5388			fib_event = FIB_EVENT_ENTRY_REPLACE;
5389
5390		err = call_fib6_multipath_entry_notifiers(info->nl_net,
5391							  fib_event, rt_notif,
5392							  nhn - 1, extack);
5393		if (err) {
5394			/* Delete all the siblings that were just added */
5395			err_nh = NULL;
5396			goto add_errout;
5397		}
5398	}
5399
5400	/* success ... tell user about new route */
5401	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5402	goto cleanup;
5403
5404add_errout:
5405	/* send notification for routes that were added so that
5406	 * the delete notifications sent by ip6_route_del are
5407	 * coherent
5408	 */
5409	if (rt_notif)
5410		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5411
5412	/* Delete routes that were already added */
5413	list_for_each_entry(nh, &rt6_nh_list, next) {
5414		if (err_nh == nh)
5415			break;
5416		ip6_route_del(&nh->r_cfg, extack);
5417	}
5418
5419cleanup:
5420	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5421		fib6_info_release(nh->fib6_info);
5422		list_del(&nh->next);
5423		kfree(nh);
5424	}
5425
5426	return err;
5427}
5428
5429static int ip6_route_multipath_del(struct fib6_config *cfg,
5430				   struct netlink_ext_ack *extack)
5431{
5432	struct fib6_config r_cfg;
5433	struct rtnexthop *rtnh;
5434	int last_err = 0;
5435	int remaining;
5436	int attrlen;
5437	int err;
5438
5439	remaining = cfg->fc_mp_len;
5440	rtnh = (struct rtnexthop *)cfg->fc_mp;
5441
5442	/* Parse a Multipath Entry */
5443	while (rtnh_ok(rtnh, remaining)) {
5444		memcpy(&r_cfg, cfg, sizeof(*cfg));
5445		if (rtnh->rtnh_ifindex)
5446			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5447
5448		attrlen = rtnh_attrlen(rtnh);
5449		if (attrlen > 0) {
5450			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5451
5452			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5453			if (nla) {
5454				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5455							extack);
5456				if (err) {
5457					last_err = err;
5458					goto next_rtnh;
5459				}
5460
5461				r_cfg.fc_flags |= RTF_GATEWAY;
5462			}
5463		}
5464		err = ip6_route_del(&r_cfg, extack);
5465		if (err)
5466			last_err = err;
5467
5468next_rtnh:
5469		rtnh = rtnh_next(rtnh, &remaining);
5470	}
5471
5472	return last_err;
5473}
5474
5475static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5476			      struct netlink_ext_ack *extack)
5477{
5478	struct fib6_config cfg;
5479	int err;
5480
5481	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5482	if (err < 0)
5483		return err;
5484
5485	if (cfg.fc_nh_id &&
5486	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5487		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5488		return -EINVAL;
5489	}
5490
5491	if (cfg.fc_mp)
5492		return ip6_route_multipath_del(&cfg, extack);
5493	else {
5494		cfg.fc_delete_all_nh = 1;
5495		return ip6_route_del(&cfg, extack);
5496	}
5497}
5498
5499static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5500			      struct netlink_ext_ack *extack)
5501{
5502	struct fib6_config cfg;
5503	int err;
5504
5505	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5506	if (err < 0)
5507		return err;
5508
5509	if (cfg.fc_metric == 0)
5510		cfg.fc_metric = IP6_RT_PRIO_USER;
5511
5512	if (cfg.fc_mp)
5513		return ip6_route_multipath_add(&cfg, extack);
5514	else
5515		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5516}
5517
5518/* add the overhead of this fib6_nh to nexthop_len */
5519static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5520{
5521	int *nexthop_len = arg;
5522
5523	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5524		     + NLA_ALIGN(sizeof(struct rtnexthop))
5525		     + nla_total_size(16); /* RTA_GATEWAY */
5526
5527	if (nh->fib_nh_lws) {
5528		/* RTA_ENCAP_TYPE */
5529		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5530		/* RTA_ENCAP */
5531		*nexthop_len += nla_total_size(2);
5532	}
5533
5534	return 0;
5535}
5536
5537static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5538{
5539	int nexthop_len;
5540
5541	if (f6i->nh) {
5542		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5543		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5544					 &nexthop_len);
5545	} else {
5546		struct fib6_info *sibling, *next_sibling;
5547		struct fib6_nh *nh = f6i->fib6_nh;
5548
5549		nexthop_len = 0;
5550		if (f6i->fib6_nsiblings) {
5551			rt6_nh_nlmsg_size(nh, &nexthop_len);
5552
5553			list_for_each_entry_safe(sibling, next_sibling,
5554						 &f6i->fib6_siblings, fib6_siblings) {
5555				rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
5556			}
5557		}
5558		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5559	}
5560
5561	return NLMSG_ALIGN(sizeof(struct rtmsg))
5562	       + nla_total_size(16) /* RTA_SRC */
5563	       + nla_total_size(16) /* RTA_DST */
5564	       + nla_total_size(16) /* RTA_GATEWAY */
5565	       + nla_total_size(16) /* RTA_PREFSRC */
5566	       + nla_total_size(4) /* RTA_TABLE */
5567	       + nla_total_size(4) /* RTA_IIF */
5568	       + nla_total_size(4) /* RTA_OIF */
5569	       + nla_total_size(4) /* RTA_PRIORITY */
5570	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5571	       + nla_total_size(sizeof(struct rta_cacheinfo))
5572	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5573	       + nla_total_size(1) /* RTA_PREF */
5574	       + nexthop_len;
5575}
5576
5577static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5578				 unsigned char *flags)
 
 
 
5579{
5580	if (nexthop_is_multipath(nh)) {
5581		struct nlattr *mp;
 
 
 
5582
5583		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5584		if (!mp)
5585			goto nla_put_failure;
5586
5587		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5588			goto nla_put_failure;
5589
5590		nla_nest_end(skb, mp);
5591	} else {
5592		struct fib6_nh *fib6_nh;
5593
5594		fib6_nh = nexthop_fib6_nh(nh);
5595		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5596				     flags, false) < 0)
5597			goto nla_put_failure;
5598	}
5599
5600	return 0;
5601
5602nla_put_failure:
5603	return -EMSGSIZE;
5604}
5605
5606static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5607			 struct fib6_info *rt, struct dst_entry *dst,
5608			 struct in6_addr *dest, struct in6_addr *src,
5609			 int iif, int type, u32 portid, u32 seq,
5610			 unsigned int flags)
5611{
5612	struct rt6_info *rt6 = dst_rt6_info(dst);
5613	struct rt6key *rt6_dst, *rt6_src;
5614	u32 *pmetrics, table, rt6_flags;
5615	unsigned char nh_flags = 0;
5616	struct nlmsghdr *nlh;
5617	struct rtmsg *rtm;
5618	long expires = 0;
5619
5620	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5621	if (!nlh)
5622		return -EMSGSIZE;
5623
5624	if (rt6) {
5625		rt6_dst = &rt6->rt6i_dst;
5626		rt6_src = &rt6->rt6i_src;
5627		rt6_flags = rt6->rt6i_flags;
5628	} else {
5629		rt6_dst = &rt->fib6_dst;
5630		rt6_src = &rt->fib6_src;
5631		rt6_flags = rt->fib6_flags;
5632	}
5633
5634	rtm = nlmsg_data(nlh);
5635	rtm->rtm_family = AF_INET6;
5636	rtm->rtm_dst_len = rt6_dst->plen;
5637	rtm->rtm_src_len = rt6_src->plen;
5638	rtm->rtm_tos = 0;
5639	if (rt->fib6_table)
5640		table = rt->fib6_table->tb6_id;
5641	else
5642		table = RT6_TABLE_UNSPEC;
5643	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5644	if (nla_put_u32(skb, RTA_TABLE, table))
5645		goto nla_put_failure;
5646
5647	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
5648	rtm->rtm_flags = 0;
5649	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5650	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
5651
5652	if (rt6_flags & RTF_CACHE)
5653		rtm->rtm_flags |= RTM_F_CLONED;
5654
5655	if (dest) {
5656		if (nla_put_in6_addr(skb, RTA_DST, dest))
5657			goto nla_put_failure;
5658		rtm->rtm_dst_len = 128;
5659	} else if (rtm->rtm_dst_len)
5660		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5661			goto nla_put_failure;
5662#ifdef CONFIG_IPV6_SUBTREES
5663	if (src) {
5664		if (nla_put_in6_addr(skb, RTA_SRC, src))
5665			goto nla_put_failure;
5666		rtm->rtm_src_len = 128;
5667	} else if (rtm->rtm_src_len &&
5668		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5669		goto nla_put_failure;
5670#endif
5671	if (iif) {
5672#ifdef CONFIG_IPV6_MROUTE
5673		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5674			int err = ip6mr_get_route(net, skb, rtm, portid);
5675
5676			if (err == 0)
5677				return 0;
5678			if (err < 0)
5679				goto nla_put_failure;
 
 
 
 
 
5680		} else
5681#endif
5682			if (nla_put_u32(skb, RTA_IIF, iif))
5683				goto nla_put_failure;
5684	} else if (dest) {
5685		struct in6_addr saddr_buf;
5686		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5687		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5688			goto nla_put_failure;
5689	}
5690
5691	if (rt->fib6_prefsrc.plen) {
5692		struct in6_addr saddr_buf;
5693		saddr_buf = rt->fib6_prefsrc.addr;
5694		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5695			goto nla_put_failure;
5696	}
5697
5698	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5699	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
5700		goto nla_put_failure;
5701
5702	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5703		goto nla_put_failure;
5704
5705	/* For multipath routes, walk the siblings list and add
5706	 * each as a nexthop within RTA_MULTIPATH.
5707	 */
5708	if (rt6) {
5709		if (rt6_flags & RTF_GATEWAY &&
5710		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5711			goto nla_put_failure;
5712
5713		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5714			goto nla_put_failure;
5715
5716		if (dst->lwtstate &&
5717		    lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
5718			goto nla_put_failure;
5719	} else if (rt->fib6_nsiblings) {
5720		struct fib6_info *sibling, *next_sibling;
5721		struct nlattr *mp;
5722
5723		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5724		if (!mp)
5725			goto nla_put_failure;
5726
5727		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5728				    rt->fib6_nh->fib_nh_weight, AF_INET6,
5729				    0) < 0)
5730			goto nla_put_failure;
5731
5732		list_for_each_entry_safe(sibling, next_sibling,
5733					 &rt->fib6_siblings, fib6_siblings) {
5734			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5735					    sibling->fib6_nh->fib_nh_weight,
5736					    AF_INET6, 0) < 0)
5737				goto nla_put_failure;
5738		}
5739
5740		nla_nest_end(skb, mp);
5741	} else if (rt->nh) {
5742		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5743			goto nla_put_failure;
5744
5745		if (nexthop_is_blackhole(rt->nh))
5746			rtm->rtm_type = RTN_BLACKHOLE;
5747
5748		if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
5749		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5750			goto nla_put_failure;
5751
5752		rtm->rtm_flags |= nh_flags;
5753	} else {
5754		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5755				     &nh_flags, false) < 0)
5756			goto nla_put_failure;
5757
5758		rtm->rtm_flags |= nh_flags;
5759	}
5760
5761	if (rt6_flags & RTF_EXPIRES) {
5762		expires = dst ? dst->expires : rt->expires;
5763		expires -= jiffies;
5764	}
5765
5766	if (!dst) {
5767		if (READ_ONCE(rt->offload))
5768			rtm->rtm_flags |= RTM_F_OFFLOAD;
5769		if (READ_ONCE(rt->trap))
5770			rtm->rtm_flags |= RTM_F_TRAP;
5771		if (READ_ONCE(rt->offload_failed))
5772			rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
5773	}
5774
5775	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5776		goto nla_put_failure;
5777
5778	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5779		goto nla_put_failure;
5780
5781
5782	nlmsg_end(skb, nlh);
5783	return 0;
5784
5785nla_put_failure:
5786	nlmsg_cancel(skb, nlh);
5787	return -EMSGSIZE;
5788}
5789
5790static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5791{
5792	const struct net_device *dev = arg;
5793
5794	if (nh->fib_nh_dev == dev)
5795		return 1;
5796
5797	return 0;
5798}
5799
5800static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5801			       const struct net_device *dev)
5802{
5803	if (f6i->nh) {
5804		struct net_device *_dev = (struct net_device *)dev;
5805
5806		return !!nexthop_for_each_fib6_nh(f6i->nh,
5807						  fib6_info_nh_uses_dev,
5808						  _dev);
5809	}
5810
5811	if (f6i->fib6_nh->fib_nh_dev == dev)
5812		return true;
5813
5814	if (f6i->fib6_nsiblings) {
5815		struct fib6_info *sibling, *next_sibling;
5816
5817		list_for_each_entry_safe(sibling, next_sibling,
5818					 &f6i->fib6_siblings, fib6_siblings) {
5819			if (sibling->fib6_nh->fib_nh_dev == dev)
5820				return true;
5821		}
5822	}
5823
5824	return false;
5825}
5826
5827struct fib6_nh_exception_dump_walker {
5828	struct rt6_rtnl_dump_arg *dump;
5829	struct fib6_info *rt;
5830	unsigned int flags;
5831	unsigned int skip;
5832	unsigned int count;
5833};
5834
5835static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5836{
5837	struct fib6_nh_exception_dump_walker *w = arg;
5838	struct rt6_rtnl_dump_arg *dump = w->dump;
5839	struct rt6_exception_bucket *bucket;
5840	struct rt6_exception *rt6_ex;
5841	int i, err;
5842
5843	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5844	if (!bucket)
5845		return 0;
5846
5847	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5848		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5849			if (w->skip) {
5850				w->skip--;
5851				continue;
5852			}
5853
5854			/* Expiration of entries doesn't bump sernum, insertion
5855			 * does. Removal is triggered by insertion, so we can
5856			 * rely on the fact that if entries change between two
5857			 * partial dumps, this node is scanned again completely,
5858			 * see rt6_insert_exception() and fib6_dump_table().
5859			 *
5860			 * Count expired entries we go through as handled
5861			 * entries that we'll skip next time, in case of partial
5862			 * node dump. Otherwise, if entries expire meanwhile,
5863			 * we'll skip the wrong amount.
5864			 */
5865			if (rt6_check_expired(rt6_ex->rt6i)) {
5866				w->count++;
5867				continue;
5868			}
5869
5870			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5871					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5872					    RTM_NEWROUTE,
5873					    NETLINK_CB(dump->cb->skb).portid,
5874					    dump->cb->nlh->nlmsg_seq, w->flags);
5875			if (err)
5876				return err;
5877
5878			w->count++;
5879		}
5880		bucket++;
5881	}
5882
5883	return 0;
5884}
5885
5886/* Return -1 if done with node, number of handled routes on partial dump */
5887int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5888{
5889	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5890	struct fib_dump_filter *filter = &arg->filter;
5891	unsigned int flags = NLM_F_MULTI;
5892	struct net *net = arg->net;
5893	int count = 0;
5894
5895	if (rt == net->ipv6.fib6_null_entry)
5896		return -1;
5897
5898	if ((filter->flags & RTM_F_PREFIX) &&
5899	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5900		/* success since this is not a prefix route */
5901		return -1;
5902	}
5903	if (filter->filter_set &&
5904	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5905	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5906	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5907		return -1;
5908	}
5909
5910	if (filter->filter_set ||
5911	    !filter->dump_routes || !filter->dump_exceptions) {
5912		flags |= NLM_F_DUMP_FILTERED;
5913	}
5914
5915	if (filter->dump_routes) {
5916		if (skip) {
5917			skip--;
5918		} else {
5919			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5920					  0, RTM_NEWROUTE,
5921					  NETLINK_CB(arg->cb->skb).portid,
5922					  arg->cb->nlh->nlmsg_seq, flags)) {
5923				return 0;
5924			}
5925			count++;
5926		}
5927	}
5928
5929	if (filter->dump_exceptions) {
5930		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5931							   .rt = rt,
5932							   .flags = flags,
5933							   .skip = skip,
5934							   .count = 0 };
5935		int err;
5936
5937		rcu_read_lock();
5938		if (rt->nh) {
5939			err = nexthop_for_each_fib6_nh(rt->nh,
5940						       rt6_nh_dump_exceptions,
5941						       &w);
5942		} else {
5943			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5944		}
5945		rcu_read_unlock();
5946
5947		if (err)
5948			return count + w.count;
5949	}
5950
5951	return -1;
5952}
5953
5954static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5955					const struct nlmsghdr *nlh,
5956					struct nlattr **tb,
5957					struct netlink_ext_ack *extack)
5958{
5959	struct rtmsg *rtm;
5960	int i, err;
5961
5962	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5963		NL_SET_ERR_MSG_MOD(extack,
5964				   "Invalid header for get route request");
5965		return -EINVAL;
5966	}
5967
5968	if (!netlink_strict_get_check(skb))
5969		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5970					      rtm_ipv6_policy, extack);
5971
5972	rtm = nlmsg_data(nlh);
5973	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5974	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5975	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5976	    rtm->rtm_type) {
5977		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5978		return -EINVAL;
5979	}
5980	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5981		NL_SET_ERR_MSG_MOD(extack,
5982				   "Invalid flags for get route request");
5983		return -EINVAL;
5984	}
5985
5986	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5987					    rtm_ipv6_policy, extack);
5988	if (err)
5989		return err;
5990
5991	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5992	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5993		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5994		return -EINVAL;
5995	}
5996
5997	for (i = 0; i <= RTA_MAX; i++) {
5998		if (!tb[i])
5999			continue;
6000
6001		switch (i) {
6002		case RTA_SRC:
6003		case RTA_DST:
6004		case RTA_IIF:
6005		case RTA_OIF:
6006		case RTA_MARK:
6007		case RTA_UID:
6008		case RTA_SPORT:
6009		case RTA_DPORT:
6010		case RTA_IP_PROTO:
6011			break;
6012		default:
6013			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
6014			return -EINVAL;
6015		}
6016	}
6017
6018	return 0;
6019}
6020
6021static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
6022			      struct netlink_ext_ack *extack)
6023{
6024	struct net *net = sock_net(in_skb->sk);
6025	struct nlattr *tb[RTA_MAX+1];
6026	int err, iif = 0, oif = 0;
6027	struct fib6_info *from;
6028	struct dst_entry *dst;
6029	struct rt6_info *rt;
6030	struct sk_buff *skb;
6031	struct rtmsg *rtm;
6032	struct flowi6 fl6 = {};
6033	bool fibmatch;
6034
6035	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
6036	if (err < 0)
6037		goto errout;
6038
6039	err = -EINVAL;
6040	rtm = nlmsg_data(nlh);
6041	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
6042	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
6043
6044	if (tb[RTA_SRC]) {
6045		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
6046			goto errout;
6047
6048		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
6049	}
6050
6051	if (tb[RTA_DST]) {
6052		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
6053			goto errout;
6054
6055		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
6056	}
6057
6058	if (tb[RTA_IIF])
6059		iif = nla_get_u32(tb[RTA_IIF]);
6060
6061	if (tb[RTA_OIF])
6062		oif = nla_get_u32(tb[RTA_OIF]);
6063
6064	if (tb[RTA_MARK])
6065		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
6066
6067	if (tb[RTA_UID])
6068		fl6.flowi6_uid = make_kuid(current_user_ns(),
6069					   nla_get_u32(tb[RTA_UID]));
6070	else
6071		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
6072
6073	if (tb[RTA_SPORT])
6074		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
6075
6076	if (tb[RTA_DPORT])
6077		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
6078
6079	if (tb[RTA_IP_PROTO]) {
6080		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
6081						  &fl6.flowi6_proto, AF_INET6,
6082						  extack);
6083		if (err)
6084			goto errout;
6085	}
6086
6087	if (iif) {
6088		struct net_device *dev;
6089		int flags = 0;
6090
6091		rcu_read_lock();
6092
6093		dev = dev_get_by_index_rcu(net, iif);
6094		if (!dev) {
6095			rcu_read_unlock();
6096			err = -ENODEV;
6097			goto errout;
6098		}
6099
6100		fl6.flowi6_iif = iif;
6101
6102		if (!ipv6_addr_any(&fl6.saddr))
6103			flags |= RT6_LOOKUP_F_HAS_SADDR;
6104
6105		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
6106
6107		rcu_read_unlock();
6108	} else {
6109		fl6.flowi6_oif = oif;
6110
6111		dst = ip6_route_output(net, NULL, &fl6);
6112	}
6113
6114
6115	rt = dst_rt6_info(dst);
6116	if (rt->dst.error) {
6117		err = rt->dst.error;
6118		ip6_rt_put(rt);
6119		goto errout;
6120	}
6121
6122	if (rt == net->ipv6.ip6_null_entry) {
6123		err = rt->dst.error;
6124		ip6_rt_put(rt);
6125		goto errout;
6126	}
6127
6128	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
6129	if (!skb) {
6130		ip6_rt_put(rt);
6131		err = -ENOBUFS;
6132		goto errout;
6133	}
6134
 
 
 
 
 
 
 
6135	skb_dst_set(skb, &rt->dst);
6136
6137	rcu_read_lock();
6138	from = rcu_dereference(rt->from);
6139	if (from) {
6140		if (fibmatch)
6141			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
6142					    iif, RTM_NEWROUTE,
6143					    NETLINK_CB(in_skb).portid,
6144					    nlh->nlmsg_seq, 0);
6145		else
6146			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
6147					    &fl6.saddr, iif, RTM_NEWROUTE,
6148					    NETLINK_CB(in_skb).portid,
6149					    nlh->nlmsg_seq, 0);
6150	} else {
6151		err = -ENETUNREACH;
6152	}
6153	rcu_read_unlock();
6154
6155	if (err < 0) {
6156		kfree_skb(skb);
6157		goto errout;
6158	}
6159
6160	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
6161errout:
6162	return err;
6163}
6164
6165void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6166		     unsigned int nlm_flags)
6167{
6168	struct sk_buff *skb;
6169	struct net *net = info->nl_net;
6170	u32 seq;
6171	int err;
6172
6173	err = -ENOBUFS;
6174	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6175
6176	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6177	if (!skb)
6178		goto errout;
6179
6180	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6181			    event, info->portid, seq, nlm_flags);
6182	if (err < 0) {
6183		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6184		WARN_ON(err == -EMSGSIZE);
6185		kfree_skb(skb);
6186		goto errout;
6187	}
6188	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6189		    info->nlh, gfp_any());
6190	return;
6191errout:
6192	if (err < 0)
6193		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6194}
6195
6196void fib6_rt_update(struct net *net, struct fib6_info *rt,
6197		    struct nl_info *info)
6198{
6199	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6200	struct sk_buff *skb;
6201	int err = -ENOBUFS;
6202
6203	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6204	if (!skb)
6205		goto errout;
6206
6207	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6208			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6209	if (err < 0) {
6210		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6211		WARN_ON(err == -EMSGSIZE);
6212		kfree_skb(skb);
6213		goto errout;
6214	}
6215	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6216		    info->nlh, gfp_any());
6217	return;
6218errout:
6219	if (err < 0)
6220		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6221}
6222
6223void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
6224			    bool offload, bool trap, bool offload_failed)
6225{
6226	struct sk_buff *skb;
6227	int err;
6228
6229	if (READ_ONCE(f6i->offload) == offload &&
6230	    READ_ONCE(f6i->trap) == trap &&
6231	    READ_ONCE(f6i->offload_failed) == offload_failed)
6232		return;
6233
6234	WRITE_ONCE(f6i->offload, offload);
6235	WRITE_ONCE(f6i->trap, trap);
6236
6237	/* 2 means send notifications only if offload_failed was changed. */
6238	if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
6239	    READ_ONCE(f6i->offload_failed) == offload_failed)
6240		return;
6241
6242	WRITE_ONCE(f6i->offload_failed, offload_failed);
6243
6244	if (!rcu_access_pointer(f6i->fib6_node))
6245		/* The route was removed from the tree, do not send
6246		 * notification.
6247		 */
6248		return;
6249
6250	if (!net->ipv6.sysctl.fib_notify_on_flag_change)
6251		return;
6252
6253	skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
6254	if (!skb) {
6255		err = -ENOBUFS;
6256		goto errout;
6257	}
6258
6259	err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
6260			    0, 0);
6261	if (err < 0) {
6262		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6263		WARN_ON(err == -EMSGSIZE);
6264		kfree_skb(skb);
6265		goto errout;
6266	}
6267
6268	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
6269	return;
6270
6271errout:
6272	rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6273}
6274EXPORT_SYMBOL(fib6_info_hw_flags_set);
6275
6276static int ip6_route_dev_notify(struct notifier_block *this,
6277				unsigned long event, void *ptr)
6278{
6279	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6280	struct net *net = dev_net(dev);
6281
6282	if (!(dev->flags & IFF_LOOPBACK))
6283		return NOTIFY_OK;
6284
6285	if (event == NETDEV_REGISTER) {
6286		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6287		net->ipv6.ip6_null_entry->dst.dev = dev;
6288		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6289#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6290		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6291		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6292		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6293		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6294#endif
6295	 } else if (event == NETDEV_UNREGISTER &&
6296		    dev->reg_state != NETREG_UNREGISTERED) {
6297		/* NETDEV_UNREGISTER could be fired for multiple times by
6298		 * netdev_wait_allrefs(). Make sure we only call this once.
6299		 */
6300		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6301#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6302		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6303		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6304#endif
6305	}
6306
6307	return NOTIFY_OK;
6308}
6309
6310/*
6311 *	/proc
6312 */
6313
6314#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6315static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6316{
6317	struct net *net = (struct net *)seq->private;
6318	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6319		   net->ipv6.rt6_stats->fib_nodes,
6320		   net->ipv6.rt6_stats->fib_route_nodes,
6321		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6322		   net->ipv6.rt6_stats->fib_rt_entries,
6323		   net->ipv6.rt6_stats->fib_rt_cache,
6324		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6325		   net->ipv6.rt6_stats->fib_discarded_routes);
6326
6327	return 0;
6328}
 
 
 
 
 
 
 
 
 
 
 
 
 
6329#endif	/* CONFIG_PROC_FS */
6330
6331#ifdef CONFIG_SYSCTL
6332
6333static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6334			      void *buffer, size_t *lenp, loff_t *ppos)
 
6335{
6336	struct net *net;
6337	int delay;
6338	int ret;
6339	if (!write)
6340		return -EINVAL;
6341
6342	net = (struct net *)ctl->extra1;
6343	delay = net->ipv6.sysctl.flush_delay;
6344	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6345	if (ret)
6346		return ret;
6347
6348	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6349	return 0;
6350}
6351
6352static struct ctl_table ipv6_route_table_template[] = {
6353	{
6354		.procname	=	"max_size",
6355		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6356		.maxlen		=	sizeof(int),
6357		.mode		=	0644,
6358		.proc_handler	=	proc_dointvec,
6359	},
6360	{
6361		.procname	=	"gc_thresh",
6362		.data		=	&ip6_dst_ops_template.gc_thresh,
6363		.maxlen		=	sizeof(int),
6364		.mode		=	0644,
6365		.proc_handler	=	proc_dointvec,
6366	},
6367	{
6368		.procname	=	"flush",
6369		.data		=	&init_net.ipv6.sysctl.flush_delay,
6370		.maxlen		=	sizeof(int),
6371		.mode		=	0200,
6372		.proc_handler	=	ipv6_sysctl_rtcache_flush
6373	},
6374	{
6375		.procname	=	"gc_min_interval",
6376		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6377		.maxlen		=	sizeof(int),
6378		.mode		=	0644,
6379		.proc_handler	=	proc_dointvec_jiffies,
6380	},
6381	{
6382		.procname	=	"gc_timeout",
6383		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6384		.maxlen		=	sizeof(int),
6385		.mode		=	0644,
6386		.proc_handler	=	proc_dointvec_jiffies,
6387	},
6388	{
6389		.procname	=	"gc_interval",
6390		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6391		.maxlen		=	sizeof(int),
6392		.mode		=	0644,
6393		.proc_handler	=	proc_dointvec_jiffies,
6394	},
6395	{
6396		.procname	=	"gc_elasticity",
6397		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6398		.maxlen		=	sizeof(int),
6399		.mode		=	0644,
6400		.proc_handler	=	proc_dointvec,
6401	},
6402	{
6403		.procname	=	"mtu_expires",
6404		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6405		.maxlen		=	sizeof(int),
6406		.mode		=	0644,
6407		.proc_handler	=	proc_dointvec_jiffies,
6408	},
6409	{
6410		.procname	=	"min_adv_mss",
6411		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6412		.maxlen		=	sizeof(int),
6413		.mode		=	0644,
6414		.proc_handler	=	proc_dointvec,
6415	},
6416	{
6417		.procname	=	"gc_min_interval_ms",
6418		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6419		.maxlen		=	sizeof(int),
6420		.mode		=	0644,
6421		.proc_handler	=	proc_dointvec_ms_jiffies,
6422	},
6423	{
6424		.procname	=	"skip_notify_on_dev_down",
6425		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6426		.maxlen		=	sizeof(u8),
6427		.mode		=	0644,
6428		.proc_handler	=	proc_dou8vec_minmax,
6429		.extra1		=	SYSCTL_ZERO,
6430		.extra2		=	SYSCTL_ONE,
6431	},
6432	{ }
6433};
6434
6435struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6436{
6437	struct ctl_table *table;
6438
6439	table = kmemdup(ipv6_route_table_template,
6440			sizeof(ipv6_route_table_template),
6441			GFP_KERNEL);
6442
6443	if (table) {
6444		table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
6445		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6446		table[2].data = &net->ipv6.sysctl.flush_delay;
6447		table[2].extra1 = net;
6448		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6449		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6450		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6451		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6452		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6453		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6454		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6455		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6456
6457		/* Don't export sysctls to unprivileged users */
6458		if (net->user_ns != &init_user_ns)
6459			table[1].procname = NULL;
6460	}
6461
6462	return table;
6463}
6464
6465size_t ipv6_route_sysctl_table_size(struct net *net)
6466{
6467	/* Don't export sysctls to unprivileged users */
6468	if (net->user_ns != &init_user_ns)
6469		return 1;
6470
6471	return ARRAY_SIZE(ipv6_route_table_template);
6472}
6473#endif
6474
6475static int __net_init ip6_route_net_init(struct net *net)
6476{
6477	int ret = -ENOMEM;
6478
6479	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6480	       sizeof(net->ipv6.ip6_dst_ops));
6481
6482	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6483		goto out_ip6_dst_ops;
6484
6485	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6486	if (!net->ipv6.fib6_null_entry)
6487		goto out_ip6_dst_entries;
6488	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6489	       sizeof(*net->ipv6.fib6_null_entry));
6490
6491	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6492					   sizeof(*net->ipv6.ip6_null_entry),
6493					   GFP_KERNEL);
6494	if (!net->ipv6.ip6_null_entry)
6495		goto out_fib6_null_entry;
 
 
6496	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6497	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6498			 ip6_template_metrics, true);
6499	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
6500
6501#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6502	net->ipv6.fib6_has_custom_rules = false;
6503	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6504					       sizeof(*net->ipv6.ip6_prohibit_entry),
6505					       GFP_KERNEL);
6506	if (!net->ipv6.ip6_prohibit_entry)
6507		goto out_ip6_null_entry;
 
 
6508	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6509	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6510			 ip6_template_metrics, true);
6511	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
6512
6513	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6514					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6515					       GFP_KERNEL);
6516	if (!net->ipv6.ip6_blk_hole_entry)
6517		goto out_ip6_prohibit_entry;
 
 
6518	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6519	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6520			 ip6_template_metrics, true);
6521	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
6522#ifdef CONFIG_IPV6_SUBTREES
6523	net->ipv6.fib6_routes_require_src = 0;
6524#endif
6525#endif
6526
6527	net->ipv6.sysctl.flush_delay = 0;
6528	net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
6529	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6530	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6531	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6532	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6533	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6534	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6535	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6536
6537	atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
 
 
 
 
6538
6539	ret = 0;
6540out:
6541	return ret;
6542
6543#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6544out_ip6_prohibit_entry:
6545	kfree(net->ipv6.ip6_prohibit_entry);
6546out_ip6_null_entry:
6547	kfree(net->ipv6.ip6_null_entry);
6548#endif
6549out_fib6_null_entry:
6550	kfree(net->ipv6.fib6_null_entry);
6551out_ip6_dst_entries:
6552	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6553out_ip6_dst_ops:
6554	goto out;
6555}
6556
6557static void __net_exit ip6_route_net_exit(struct net *net)
6558{
6559	kfree(net->ipv6.fib6_null_entry);
 
 
 
6560	kfree(net->ipv6.ip6_null_entry);
6561#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6562	kfree(net->ipv6.ip6_prohibit_entry);
6563	kfree(net->ipv6.ip6_blk_hole_entry);
6564#endif
6565	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6566}
6567
6568static int __net_init ip6_route_net_init_late(struct net *net)
6569{
6570#ifdef CONFIG_PROC_FS
6571	if (!proc_create_net("ipv6_route", 0, net->proc_net,
6572			     &ipv6_route_seq_ops,
6573			     sizeof(struct ipv6_route_iter)))
6574		return -ENOMEM;
6575
6576	if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
6577				    rt6_stats_seq_show, NULL)) {
6578		remove_proc_entry("ipv6_route", net->proc_net);
6579		return -ENOMEM;
6580	}
6581#endif
6582	return 0;
6583}
6584
6585static void __net_exit ip6_route_net_exit_late(struct net *net)
6586{
6587#ifdef CONFIG_PROC_FS
6588	remove_proc_entry("ipv6_route", net->proc_net);
6589	remove_proc_entry("rt6_stats", net->proc_net);
6590#endif
6591}
6592
6593static struct pernet_operations ip6_route_net_ops = {
6594	.init = ip6_route_net_init,
6595	.exit = ip6_route_net_exit,
6596};
6597
6598static int __net_init ipv6_inetpeer_init(struct net *net)
6599{
6600	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6601
6602	if (!bp)
6603		return -ENOMEM;
6604	inet_peer_base_init(bp);
6605	net->ipv6.peers = bp;
6606	return 0;
6607}
6608
6609static void __net_exit ipv6_inetpeer_exit(struct net *net)
6610{
6611	struct inet_peer_base *bp = net->ipv6.peers;
6612
6613	net->ipv6.peers = NULL;
6614	inetpeer_invalidate_tree(bp);
6615	kfree(bp);
6616}
6617
6618static struct pernet_operations ipv6_inetpeer_ops = {
6619	.init	=	ipv6_inetpeer_init,
6620	.exit	=	ipv6_inetpeer_exit,
6621};
6622
6623static struct pernet_operations ip6_route_net_late_ops = {
6624	.init = ip6_route_net_init_late,
6625	.exit = ip6_route_net_exit_late,
6626};
6627
6628static struct notifier_block ip6_route_dev_notifier = {
6629	.notifier_call = ip6_route_dev_notify,
6630	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6631};
6632
6633void __init ip6_route_init_special_entries(void)
6634{
6635	/* Registering of the loopback is done before this portion of code,
6636	 * the loopback reference in rt6_info will not be taken, do it
6637	 * manually for init_net */
6638	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6639	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6640	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6641  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6642	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6643	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6644	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6645	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6646  #endif
6647}
6648
6649#if IS_BUILTIN(CONFIG_IPV6)
6650#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6651DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6652
6653BTF_ID_LIST(btf_fib6_info_id)
6654BTF_ID(struct, fib6_info)
6655
6656static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6657	.seq_ops		= &ipv6_route_seq_ops,
6658	.init_seq_private	= bpf_iter_init_seq_net,
6659	.fini_seq_private	= bpf_iter_fini_seq_net,
6660	.seq_priv_size		= sizeof(struct ipv6_route_iter),
6661};
6662
6663static struct bpf_iter_reg ipv6_route_reg_info = {
6664	.target			= "ipv6_route",
6665	.ctx_arg_info_size	= 1,
6666	.ctx_arg_info		= {
6667		{ offsetof(struct bpf_iter__ipv6_route, rt),
6668		  PTR_TO_BTF_ID_OR_NULL },
6669	},
6670	.seq_info		= &ipv6_route_seq_info,
6671};
6672
6673static int __init bpf_iter_register(void)
6674{
6675	ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6676	return bpf_iter_reg_target(&ipv6_route_reg_info);
6677}
6678
6679static void bpf_iter_unregister(void)
6680{
6681	bpf_iter_unreg_target(&ipv6_route_reg_info);
6682}
6683#endif
6684#endif
6685
6686int __init ip6_route_init(void)
6687{
6688	int ret;
6689	int cpu;
6690
6691	ret = -ENOMEM;
6692	ip6_dst_ops_template.kmem_cachep =
6693		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6694				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
6695	if (!ip6_dst_ops_template.kmem_cachep)
6696		goto out;
6697
6698	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6699	if (ret)
6700		goto out_kmem_cache;
6701
6702	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6703	if (ret)
6704		goto out_dst_entries;
6705
6706	ret = register_pernet_subsys(&ip6_route_net_ops);
6707	if (ret)
6708		goto out_register_inetpeer;
6709
6710	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6711
 
 
 
 
 
 
 
 
 
 
 
6712	ret = fib6_init();
6713	if (ret)
6714		goto out_register_subsys;
6715
6716	ret = xfrm6_init();
6717	if (ret)
6718		goto out_fib6_init;
6719
6720	ret = fib6_rules_init();
6721	if (ret)
6722		goto xfrm6_init;
6723
6724	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6725	if (ret)
 
 
6726		goto fib6_rules_init;
6727
6728	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6729				   inet6_rtm_newroute, NULL, 0);
6730	if (ret < 0)
6731		goto out_register_late_subsys;
6732
6733	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6734				   inet6_rtm_delroute, NULL, 0);
6735	if (ret < 0)
6736		goto out_register_late_subsys;
6737
6738	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6739				   inet6_rtm_getroute, NULL,
6740				   RTNL_FLAG_DOIT_UNLOCKED);
6741	if (ret < 0)
6742		goto out_register_late_subsys;
6743
6744	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6745	if (ret)
6746		goto out_register_late_subsys;
6747
6748#if IS_BUILTIN(CONFIG_IPV6)
6749#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6750	ret = bpf_iter_register();
6751	if (ret)
6752		goto out_register_late_subsys;
6753#endif
6754#endif
6755
6756	for_each_possible_cpu(cpu) {
6757		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6758
6759		INIT_LIST_HEAD(&ul->head);
6760		INIT_LIST_HEAD(&ul->quarantine);
6761		spin_lock_init(&ul->lock);
6762	}
6763
6764out:
6765	return ret;
6766
6767out_register_late_subsys:
6768	rtnl_unregister_all(PF_INET6);
6769	unregister_pernet_subsys(&ip6_route_net_late_ops);
6770fib6_rules_init:
6771	fib6_rules_cleanup();
6772xfrm6_init:
6773	xfrm6_fini();
6774out_fib6_init:
6775	fib6_gc_cleanup();
6776out_register_subsys:
6777	unregister_pernet_subsys(&ip6_route_net_ops);
6778out_register_inetpeer:
6779	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6780out_dst_entries:
6781	dst_entries_destroy(&ip6_dst_blackhole_ops);
6782out_kmem_cache:
6783	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6784	goto out;
6785}
6786
6787void ip6_route_cleanup(void)
6788{
6789#if IS_BUILTIN(CONFIG_IPV6)
6790#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6791	bpf_iter_unregister();
6792#endif
6793#endif
6794	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6795	unregister_pernet_subsys(&ip6_route_net_late_ops);
6796	fib6_rules_cleanup();
6797	xfrm6_fini();
6798	fib6_gc_cleanup();
6799	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6800	unregister_pernet_subsys(&ip6_route_net_ops);
6801	dst_entries_destroy(&ip6_dst_blackhole_ops);
6802	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6803}