Linux Audio

Check our new training course

Loading...
v4.6
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
 
 
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
 
 
 
 
 
 
 
  73enum rt6_nud_state {
  74	RT6_NUD_FAIL_HARD = -3,
  75	RT6_NUD_FAIL_PROBE = -2,
  76	RT6_NUD_FAIL_DO_RR = -1,
  77	RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 
 
  85static void		ip6_dst_destroy(struct dst_entry *);
  86static void		ip6_dst_ifdown(struct dst_entry *,
  87				       struct net_device *dev, int how);
  88static int		 ip6_dst_gc(struct dst_ops *ops);
  89
  90static int		ip6_pkt_discard(struct sk_buff *skb);
  91static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int		ip6_pkt_prohibit(struct sk_buff *skb);
  93static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void		ip6_link_failure(struct sk_buff *skb);
  95static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96					   struct sk_buff *skb, u32 mtu);
 
  97static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98					struct sk_buff *skb);
  99static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 
 
 
 
 
 
 
 
 
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104					   const struct in6_addr *prefix, int prefixlen,
 105					   const struct in6_addr *gwaddr, int ifindex,
 
 106					   unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108					   const struct in6_addr *prefix, int prefixlen,
 109					   const struct in6_addr *gwaddr, int ifindex);
 
 110#endif
 111
 112struct uncached_list {
 113	spinlock_t		lock;
 114	struct list_head	head;
 
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123	rt->dst.flags |= DST_NOCACHE;
 124	rt->rt6i_uncached_list = ul;
 125
 126	spin_lock_bh(&ul->lock);
 127	list_add_tail(&rt->rt6i_uncached, &ul->head);
 128	spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133	if (!list_empty(&rt->rt6i_uncached)) {
 134		struct uncached_list *ul = rt->rt6i_uncached_list;
 135
 136		spin_lock_bh(&ul->lock);
 137		list_del(&rt->rt6i_uncached);
 138		spin_unlock_bh(&ul->lock);
 139	}
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144	struct net_device *loopback_dev = net->loopback_dev;
 145	int cpu;
 146
 147	if (dev == loopback_dev)
 148		return;
 149
 150	for_each_possible_cpu(cpu) {
 151		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152		struct rt6_info *rt;
 
 
 
 153
 154		spin_lock_bh(&ul->lock);
 155		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156			struct inet6_dev *rt_idev = rt->rt6i_idev;
 157			struct net_device *rt_dev = rt->dst.dev;
 
 158
 159			if (rt_idev->dev == dev) {
 160				rt->rt6i_idev = in6_dev_get(loopback_dev);
 161				in6_dev_put(rt_idev);
 
 162			}
 163
 164			if (rt_dev == dev) {
 165				rt->dst.dev = loopback_dev;
 166				dev_hold(rt->dst.dev);
 167				dev_put(rt_dev);
 
 
 168			}
 
 
 
 169		}
 170		spin_unlock_bh(&ul->lock);
 171	}
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176	return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181	struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183	if (rt->rt6i_flags & RTF_PCPU)
 184		return rt6_pcpu_cow_metrics(rt);
 185	else if (rt->rt6i_flags & RTF_CACHE)
 186		return NULL;
 187	else
 188		return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192					     struct sk_buff *skb,
 193					     const void *daddr)
 194{
 195	struct in6_addr *p = &rt->rt6i_gateway;
 196
 197	if (!ipv6_addr_any(p))
 198		return (const void *) p;
 199	else if (skb)
 200		return &ipv6_hdr(skb)->daddr;
 201	return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205					  struct sk_buff *skb,
 206					  const void *daddr)
 
 207{
 208	struct rt6_info *rt = (struct rt6_info *) dst;
 209	struct neighbour *n;
 210
 211	daddr = choose_neigh_daddr(rt, skb, daddr);
 212	n = __ipv6_neigh_lookup(dst->dev, daddr);
 213	if (n)
 214		return n;
 215	return neigh_create(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 216}
 217
 218static struct dst_ops ip6_dst_ops_template = {
 219	.family			=	AF_INET6,
 220	.gc			=	ip6_dst_gc,
 221	.gc_thresh		=	1024,
 222	.check			=	ip6_dst_check,
 223	.default_advmss		=	ip6_default_advmss,
 224	.mtu			=	ip6_mtu,
 225	.cow_metrics		=	ipv6_cow_metrics,
 226	.destroy		=	ip6_dst_destroy,
 227	.ifdown			=	ip6_dst_ifdown,
 228	.negative_advice	=	ip6_negative_advice,
 229	.link_failure		=	ip6_link_failure,
 230	.update_pmtu		=	ip6_rt_update_pmtu,
 231	.redirect		=	rt6_do_redirect,
 232	.local_out		=	__ip6_local_out,
 233	.neigh_lookup		=	ip6_neigh_lookup,
 
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240	return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244					 struct sk_buff *skb, u32 mtu)
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249				      struct sk_buff *skb)
 250{
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254	.family			=	AF_INET6,
 255	.destroy		=	ip6_dst_destroy,
 256	.check			=	ip6_dst_check,
 257	.mtu			=	ip6_blackhole_mtu,
 258	.default_advmss		=	ip6_default_advmss,
 259	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 260	.redirect		=	ip6_rt_blackhole_redirect,
 261	.cow_metrics		=	dst_cow_metrics_generic,
 262	.neigh_lookup		=	ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266	[RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 
 
 
 
 
 
 
 
 
 269static const struct rt6_info ip6_null_entry_template = {
 270	.dst = {
 271		.__refcnt	= ATOMIC_INIT(1),
 272		.__use		= 1,
 273		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 274		.error		= -ENETUNREACH,
 275		.input		= ip6_pkt_discard,
 276		.output		= ip6_pkt_discard_out,
 277	},
 278	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 279	.rt6i_protocol  = RTPROT_KERNEL,
 280	.rt6i_metric	= ~(u32) 0,
 281	.rt6i_ref	= ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 287	.dst = {
 288		.__refcnt	= ATOMIC_INIT(1),
 289		.__use		= 1,
 290		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 291		.error		= -EACCES,
 292		.input		= ip6_pkt_prohibit,
 293		.output		= ip6_pkt_prohibit_out,
 294	},
 295	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 296	.rt6i_protocol  = RTPROT_KERNEL,
 297	.rt6i_metric	= ~(u32) 0,
 298	.rt6i_ref	= ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302	.dst = {
 303		.__refcnt	= ATOMIC_INIT(1),
 304		.__use		= 1,
 305		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 306		.error		= -EINVAL,
 307		.input		= dst_discard,
 308		.output		= dst_discard_out,
 309	},
 310	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 311	.rt6i_protocol  = RTPROT_KERNEL,
 312	.rt6i_metric	= ~(u32) 0,
 313	.rt6i_ref	= ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320	struct dst_entry *dst = &rt->dst;
 321
 322	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323	INIT_LIST_HEAD(&rt->rt6i_siblings);
 324	INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329					struct net_device *dev,
 330					int flags)
 331{
 332	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333					0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335	if (rt)
 336		rt6_info_init(rt);
 337
 338	return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342			       struct net_device *dev,
 343			       int flags)
 344{
 345	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 
 346
 347	if (rt) {
 348		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349		if (rt->rt6i_pcpu) {
 350			int cpu;
 351
 352			for_each_possible_cpu(cpu) {
 353				struct rt6_info **p;
 354
 355				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356				/* no one shares rt */
 357				*p =  NULL;
 358			}
 359		} else {
 360			dst_destroy((struct dst_entry *)rt);
 361			return NULL;
 362		}
 363	}
 364
 365	return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371	struct rt6_info *rt = (struct rt6_info *)dst;
 372	struct dst_entry *from = dst->from;
 373	struct inet6_dev *idev;
 374
 375	dst_destroy_metrics_generic(dst);
 376	free_percpu(rt->rt6i_pcpu);
 377	rt6_uncached_list_del(rt);
 378
 379	idev = rt->rt6i_idev;
 380	if (idev) {
 381		rt->rt6i_idev = NULL;
 382		in6_dev_put(idev);
 383	}
 384
 385	dst->from = NULL;
 386	dst_release(from);
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390			   int how)
 391{
 392	struct rt6_info *rt = (struct rt6_info *)dst;
 393	struct inet6_dev *idev = rt->rt6i_idev;
 394	struct net_device *loopback_dev =
 395		dev_net(dev)->loopback_dev;
 396
 397	if (dev != loopback_dev) {
 398		if (idev && idev->dev == dev) {
 399			struct inet6_dev *loopback_idev =
 400				in6_dev_get(loopback_dev);
 401			if (loopback_idev) {
 402				rt->rt6i_idev = loopback_idev;
 403				in6_dev_put(idev);
 404			}
 405		}
 406	}
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411	if (rt->rt6i_flags & RTF_EXPIRES)
 412		return time_after(jiffies, rt->dst.expires);
 413	else
 414		return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 
 
 
 
 419	if (rt->rt6i_flags & RTF_EXPIRES) {
 420		if (time_after(jiffies, rt->dst.expires))
 421			return true;
 422	} else if (rt->dst.from) {
 423		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 
 424	}
 425	return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433			       const struct flowi6 *fl6)
 434{
 435	return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439					     struct flowi6 *fl6, int oif,
 440					     int strict)
 441{
 442	struct rt6_info *sibling, *next_sibling;
 443	int route_choosen;
 
 
 444
 445	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446	/* Don't change the route, if route_choosen == 0
 447	 * (siblings does not include ourself)
 448	 */
 449	if (route_choosen)
 450		list_for_each_entry_safe(sibling, next_sibling,
 451				&match->rt6i_siblings, rt6i_siblings) {
 452			route_choosen--;
 453			if (route_choosen == 0) {
 454				if (rt6_score_route(sibling, oif, strict) < 0)
 455					break;
 456				match = sibling;
 457				break;
 458			}
 459		}
 460	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 461}
 462
 463/*
 464 *	Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468						    struct rt6_info *rt,
 469						    const struct in6_addr *saddr,
 470						    int oif,
 471						    int flags)
 472{
 473	struct rt6_info *local = NULL;
 474	struct rt6_info *sprt;
 475
 476	if (!oif && ipv6_addr_any(saddr))
 477		goto out;
 478
 479	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480		struct net_device *dev = sprt->dst.dev;
 481
 482		if (oif) {
 483			if (dev->ifindex == oif)
 484				return sprt;
 485			if (dev->flags & IFF_LOOPBACK) {
 486				if (!sprt->rt6i_idev ||
 487				    sprt->rt6i_idev->dev->ifindex != oif) {
 488					if (flags & RT6_LOOKUP_F_IFACE)
 489						continue;
 490					if (local &&
 491					    local->rt6i_idev->dev->ifindex == oif)
 492						continue;
 493				}
 494				local = sprt;
 495			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 496		} else {
 497			if (ipv6_chk_addr(net, saddr, dev,
 498					  flags & RT6_LOOKUP_F_IFACE))
 499				return sprt;
 500		}
 
 
 501	}
 502
 503	if (oif) {
 504		if (local)
 505			return local;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 506
 507		if (flags & RT6_LOOKUP_F_IFACE)
 508			return net->ipv6.ip6_null_entry;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 509	}
 510out:
 511	return rt;
 
 
 
 
 
 
 
 
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516	struct work_struct work;
 517	struct in6_addr target;
 518	struct net_device *dev;
 
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523	struct in6_addr mcaddr;
 524	struct __rt6_probe_work *work =
 525		container_of(w, struct __rt6_probe_work, work);
 526
 527	addrconf_addr_solict_mult(&work->target, &mcaddr);
 528	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529	dev_put(work->dev);
 530	kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535	struct __rt6_probe_work *work;
 
 
 536	struct neighbour *neigh;
 
 
 
 537	/*
 538	 * Okay, this does not seem to be appropriate
 539	 * for now, however, we need to check if it
 540	 * is really so; aka Router Reachability Probing.
 541	 *
 542	 * Router Reachability Probe MUST be rate-limited
 543	 * to no more than one per minute.
 544	 */
 545	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546		return;
 547	rcu_read_lock_bh();
 548	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 
 
 
 
 
 549	if (neigh) {
 550		if (neigh->nud_state & NUD_VALID)
 551			goto out;
 552
 553		work = NULL;
 554		write_lock(&neigh->lock);
 555		if (!(neigh->nud_state & NUD_VALID) &&
 556		    time_after(jiffies,
 557			       neigh->updated +
 558			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560			if (work)
 561				__neigh_set_probe_once(neigh);
 562		}
 563		write_unlock(&neigh->lock);
 564	} else {
 
 565		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566	}
 567
 568	if (work) {
 
 
 
 569		INIT_WORK(&work->work, rt6_probe_deferred);
 570		work->target = rt->rt6i_gateway;
 571		dev_hold(rt->dst.dev);
 572		work->dev = rt->dst.dev;
 573		schedule_work(&work->work);
 574	}
 575
 576out:
 577	rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590	struct net_device *dev = rt->dst.dev;
 591	if (!oif || dev->ifindex == oif)
 592		return 2;
 593	if ((dev->flags & IFF_LOOPBACK) &&
 594	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595		return 1;
 596	return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601	struct neighbour *neigh;
 602	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 
 603
 604	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605	    !(rt->rt6i_flags & RTF_GATEWAY))
 606		return RT6_NUD_SUCCEED;
 607
 608	rcu_read_lock_bh();
 609	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 610	if (neigh) {
 611		read_lock(&neigh->lock);
 612		if (neigh->nud_state & NUD_VALID)
 
 613			ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615		else if (!(neigh->nud_state & NUD_FAILED))
 616			ret = RT6_NUD_SUCCEED;
 617		else
 618			ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620		read_unlock(&neigh->lock);
 621	} else {
 622		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624	}
 625	rcu_read_unlock_bh();
 626
 627	return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631			   int strict)
 632{
 633	int m;
 
 
 
 634
 635	m = rt6_check_dev(rt, oif);
 636	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637		return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641	if (strict & RT6_LOOKUP_F_REACHABLE) {
 642		int n = rt6_check_neigh(rt);
 
 643		if (n < 0)
 644			return n;
 645	}
 646	return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650				   int *mpri, struct rt6_info *match,
 651				   bool *do_rr)
 652{
 653	int m;
 654	bool match_do_rr = false;
 655	struct inet6_dev *idev = rt->rt6i_idev;
 656	struct net_device *dev = rt->dst.dev;
 657
 658	if (dev && !netif_carrier_ok(dev) &&
 659	    idev->cnf.ignore_routes_with_linkdown)
 660		goto out;
 661
 662	if (rt6_check_expired(rt))
 
 
 663		goto out;
 664
 665	m = rt6_score_route(rt, oif, strict);
 666	if (m == RT6_NUD_FAIL_DO_RR) {
 667		match_do_rr = true;
 668		m = 0; /* lowest valid score */
 669	} else if (m == RT6_NUD_FAIL_HARD) {
 670		goto out;
 671	}
 672
 673	if (strict & RT6_LOOKUP_F_REACHABLE)
 674		rt6_probe(rt);
 675
 676	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677	if (m > *mpri) {
 678		*do_rr = match_do_rr;
 679		*mpri = m;
 680		match = rt;
 681	}
 682out:
 683	return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687				     struct rt6_info *rr_head,
 688				     u32 metric, int oif, int strict,
 689				     bool *do_rr)
 
 
 
 
 
 
 690{
 691	struct rt6_info *rt, *match, *cont;
 692	int mpri = -1;
 693
 694	match = NULL;
 695	cont = NULL;
 696	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697		if (rt->rt6i_metric != metric) {
 698			cont = rt;
 699			break;
 700		}
 701
 702		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703	}
 
 
 
 
 704
 705	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706		if (rt->rt6i_metric != metric) {
 707			cont = rt;
 708			break;
 
 
 
 
 
 709		}
 710
 711		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 712	}
 
 
 
 
 
 
 
 
 
 713
 714	if (match || !cont)
 715		return match;
 716
 717	for (rt = cont; rt; rt = rt->dst.rt6_next)
 718		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720	return match;
 
 
 
 
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 724{
 725	struct rt6_info *match, *rt0;
 726	struct net *net;
 727	bool do_rr = false;
 
 728
 729	rt0 = fn->rr_ptr;
 
 
 
 
 
 
 730	if (!rt0)
 731		fn->rr_ptr = rt0 = fn->leaf;
 732
 733	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734			     &do_rr);
 
 
 
 
 
 
 
 
 
 
 735
 
 736	if (do_rr) {
 737		struct rt6_info *next = rt0->dst.rt6_next;
 738
 739		/* no entries matched; do round-robin */
 740		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741			next = fn->leaf;
 742
 743		if (next != rt0)
 744			fn->rr_ptr = next;
 
 
 
 
 
 745	}
 746
 747	net = dev_net(rt0->dst.dev);
 748	return match ? match : net->ipv6.ip6_null_entry;
 
 
 
 
 
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758		  const struct in6_addr *gwaddr)
 759{
 760	struct net *net = dev_net(dev);
 761	struct route_info *rinfo = (struct route_info *) opt;
 762	struct in6_addr prefix_buf, *prefix;
 
 763	unsigned int pref;
 764	unsigned long lifetime;
 765	struct rt6_info *rt;
 766
 767	if (len < sizeof(struct route_info)) {
 768		return -EINVAL;
 769	}
 770
 771	/* Sanity check for prefix_len and length */
 772	if (rinfo->length > 3) {
 773		return -EINVAL;
 774	} else if (rinfo->prefix_len > 128) {
 775		return -EINVAL;
 776	} else if (rinfo->prefix_len > 64) {
 777		if (rinfo->length < 2) {
 778			return -EINVAL;
 779		}
 780	} else if (rinfo->prefix_len > 0) {
 781		if (rinfo->length < 1) {
 782			return -EINVAL;
 783		}
 784	}
 785
 786	pref = rinfo->route_pref;
 787	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788		return -EINVAL;
 789
 790	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792	if (rinfo->length == 3)
 793		prefix = (struct in6_addr *)rinfo->prefix;
 794	else {
 795		/* this function is safe */
 796		ipv6_addr_prefix(&prefix_buf,
 797				 (struct in6_addr *)rinfo->prefix,
 798				 rinfo->prefix_len);
 799		prefix = &prefix_buf;
 800	}
 801
 802	if (rinfo->prefix_len == 0)
 803		rt = rt6_get_dflt_router(gwaddr, dev);
 804	else
 805		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806					gwaddr, dev->ifindex);
 807
 808	if (rt && !lifetime) {
 809		ip6_del_rt(rt);
 810		rt = NULL;
 811	}
 812
 813	if (!rt && lifetime)
 814		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815					pref);
 816	else if (rt)
 817		rt->rt6i_flags = RTF_ROUTEINFO |
 818				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820	if (rt) {
 821		if (!addrconf_finite_timeout(lifetime))
 822			rt6_clean_expires(rt);
 823		else
 824			rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826		ip6_rt_put(rt);
 
 
 
 
 
 
 
 
 
 
 827	}
 828	return 0;
 829}
 830#endif
 831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833					struct in6_addr *saddr)
 834{
 835	struct fib6_node *pn;
 836	while (1) {
 837		if (fn->fn_flags & RTN_TL_ROOT)
 838			return NULL;
 839		pn = fn->parent;
 840		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 
 842		else
 843			fn = pn;
 844		if (fn->fn_flags & RTN_RTINFO)
 845			return fn;
 846	}
 847}
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 850					     struct fib6_table *table,
 851					     struct flowi6 *fl6, int flags)
 
 
 852{
 
 853	struct fib6_node *fn;
 854	struct rt6_info *rt;
 855
 856	read_lock_bh(&table->tb6_lock);
 857	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 858restart:
 859	rt = fn->leaf;
 860	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863	if (rt == net->ipv6.ip6_null_entry) {
 
 
 
 864		fn = fib6_backtrack(fn, &fl6->saddr);
 865		if (fn)
 866			goto restart;
 
 
 
 
 
 
 867	}
 868	dst_use(&rt->dst, jiffies);
 869	read_unlock_bh(&table->tb6_lock);
 870
 871	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 
 872
 873	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 874
 
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878				    int flags)
 879{
 880	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885			    const struct in6_addr *saddr, int oif, int strict)
 
 886{
 887	struct flowi6 fl6 = {
 888		.flowi6_oif = oif,
 889		.daddr = *daddr,
 890	};
 891	struct dst_entry *dst;
 892	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894	if (saddr) {
 895		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896		flags |= RT6_LOOKUP_F_HAS_SADDR;
 897	}
 898
 899	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900	if (dst->error == 0)
 901		return (struct rt6_info *) dst;
 902
 903	dst_release(dst);
 904
 905	return NULL;
 906}
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916			struct mx6_config *mxc)
 917{
 918	int err;
 919	struct fib6_table *table;
 920
 921	table = rt->rt6i_table;
 922	write_lock_bh(&table->tb6_lock);
 923	err = fib6_add(&table->tb6_root, rt, info, mxc);
 924	write_unlock_bh(&table->tb6_lock);
 925
 926	return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 932	struct mx6_config mxc = { .mx = NULL, };
 933
 934	return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938					   const struct in6_addr *daddr,
 939					   const struct in6_addr *saddr)
 940{
 
 
 941	struct rt6_info *rt;
 942
 943	/*
 944	 *	Clone the route.
 945	 */
 946
 947	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948		ort = (struct rt6_info *)ort->dst.from;
 949
 950	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 951
 952	if (!rt)
 
 
 
 953		return NULL;
 
 954
 955	ip6_rt_copy_init(rt, ort);
 956	rt->rt6i_flags |= RTF_CACHE;
 957	rt->rt6i_metric = 0;
 958	rt->dst.flags |= DST_HOST;
 959	rt->rt6i_dst.addr = *daddr;
 960	rt->rt6i_dst.plen = 128;
 961
 962	if (!rt6_is_gw_or_nonexthop(ort)) {
 963		if (ort->rt6i_dst.plen != 128 &&
 964		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965			rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967		if (rt->rt6i_src.plen && saddr) {
 968			rt->rt6i_src.addr = *saddr;
 969			rt->rt6i_src.plen = 128;
 970		}
 971#endif
 972	}
 973
 974	return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 
 
 
 979	struct rt6_info *pcpu_rt;
 980
 981	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982				  rt->dst.dev, rt->dst.flags);
 983
 984	if (!pcpu_rt)
 
 
 
 
 
 985		return NULL;
 986	ip6_rt_copy_init(pcpu_rt, rt);
 987	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988	pcpu_rt->rt6i_flags |= RTF_PCPU;
 
 
 
 
 989	return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 
 
 
 
 
 994{
 995	struct rt6_info *pcpu_rt, **p;
 
 
 
 
 
 996
 997	p = this_cpu_ptr(rt->rt6i_pcpu);
 998	pcpu_rt = *p;
 
 
 
 
 999
1000	if (pcpu_rt) {
1001		dst_hold(&pcpu_rt->dst);
1002		rt6_dst_from_metrics_check(pcpu_rt);
1003	}
 
1004	return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 
1008{
1009	struct fib6_table *table = rt->rt6i_table;
1010	struct rt6_info *pcpu_rt, *prev, **p;
1011
1012	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013	if (!pcpu_rt) {
1014		struct net *net = dev_net(rt->dst.dev);
 
 
 
 
 
 
 
1015
1016		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017		return net->ipv6.ip6_null_entry;
1018	}
1019
1020	read_lock_bh(&table->tb6_lock);
1021	if (rt->rt6i_pcpu) {
1022		p = this_cpu_ptr(rt->rt6i_pcpu);
1023		prev = cmpxchg(p, NULL, pcpu_rt);
1024		if (prev) {
1025			/* If someone did it before us, return prev instead */
1026			dst_destroy(&pcpu_rt->dst);
1027			pcpu_rt = prev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029	} else {
1030		/* rt has been removed from the fib6 tree
1031		 * before we have a chance to acquire the read_lock.
1032		 * In this case, don't brother to create a pcpu rt
1033		 * since rt is going away anyway.  The next
1034		 * dst_check() will trigger a re-lookup.
1035		 */
1036		dst_destroy(&pcpu_rt->dst);
1037		pcpu_rt = rt;
1038	}
1039	dst_hold(&pcpu_rt->dst);
1040	rt6_dst_from_metrics_check(pcpu_rt);
1041	read_unlock_bh(&table->tb6_lock);
1042	return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046				      struct flowi6 *fl6, int flags)
 
 
 
 
1047{
1048	struct fib6_node *fn, *saved_fn;
1049	struct rt6_info *rt;
1050	int strict = 0;
1051
1052	strict |= flags & RT6_LOOKUP_F_IFACE;
1053	if (net->ipv6.devconf_all->forwarding == 0)
1054		strict |= RT6_LOOKUP_F_REACHABLE;
1055
1056	read_lock_bh(&table->tb6_lock);
 
1057
1058	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059	saved_fn = fn;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1060
1061	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062		oif = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1063
1064redo_rt6_select:
1065	rt = rt6_select(fn, oif, strict);
1066	if (rt->rt6i_nsiblings)
1067		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068	if (rt == net->ipv6.ip6_null_entry) {
1069		fn = fib6_backtrack(fn, &fl6->saddr);
1070		if (fn)
1071			goto redo_rt6_select;
1072		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073			/* also consider unreachable route */
1074			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075			fn = saved_fn;
1076			goto redo_rt6_select;
1077		}
1078	}
1079
 
 
 
 
1080
1081	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082		dst_use(&rt->dst, jiffies);
1083		read_unlock_bh(&table->tb6_lock);
 
 
 
 
1084
1085		rt6_dst_from_metrics_check(rt);
 
1086
1087		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088		return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1089	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091		/* Create a RTF_CACHE clone which will not be
1092		 * owned by the fib6 tree.  It is for the special case where
1093		 * the daddr in the skb during the neighbor look-up is different
1094		 * from the fl6->daddr used to look-up route here.
1095		 */
 
1096
1097		struct rt6_info *uncached_rt;
 
 
 
 
 
 
 
1098
1099		dst_use(&rt->dst, jiffies);
1100		read_unlock_bh(&table->tb6_lock);
 
 
 
 
1101
1102		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103		dst_release(&rt->dst);
1104
1105		if (uncached_rt)
1106			rt6_uncached_list_add(uncached_rt);
1107		else
1108			uncached_rt = net->ipv6.ip6_null_entry;
 
 
 
 
1109
1110		dst_hold(&uncached_rt->dst);
 
 
1111
1112		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113		return uncached_rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1115	} else {
1116		/* Get a percpu copy */
 
 
 
 
 
1117
1118		struct rt6_info *pcpu_rt;
 
 
 
 
 
1119
1120		rt->dst.lastuse = jiffies;
1121		rt->dst.__use++;
1122		pcpu_rt = rt6_get_pcpu_route(rt);
1123
1124		if (pcpu_rt) {
1125			read_unlock_bh(&table->tb6_lock);
1126		} else {
1127			/* We have to do the read_unlock first
1128			 * because rt6_make_pcpu_route() may trigger
1129			 * ip6_dst_gc() which will take the write_lock.
1130			 */
1131			dst_hold(&rt->dst);
1132			read_unlock_bh(&table->tb6_lock);
1133			pcpu_rt = rt6_make_pcpu_route(rt);
1134			dst_release(&rt->dst);
1135		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1136
1137		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138		return pcpu_rt;
 
 
 
 
 
 
1139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1140	}
 
 
 
 
 
 
 
 
 
1141}
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144					    struct flowi6 *fl6, int flags)
1145{
1146	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 
 
 
 
 
 
1147}
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150						struct net_device *dev,
1151						struct flowi6 *fl6, int flags)
1152{
1153	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1155
1156	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
 
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161	const struct ipv6hdr *iph = ipv6_hdr(skb);
1162	struct net *net = dev_net(skb->dev);
1163	int flags = RT6_LOOKUP_F_HAS_SADDR;
1164	struct ip_tunnel_info *tun_info;
1165	struct flowi6 fl6 = {
1166		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1167		.daddr = iph->daddr,
1168		.saddr = iph->saddr,
1169		.flowlabel = ip6_flowinfo(iph),
1170		.flowi6_mark = skb->mark,
1171		.flowi6_proto = iph->nexthdr,
1172	};
 
1173
1174	tun_info = skb_tunnel_info(skb);
1175	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
 
 
 
 
 
 
1177	skb_dst_drop(skb);
1178	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182					     struct flowi6 *fl6, int flags)
 
 
 
1183{
1184	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188					 struct flowi6 *fl6, int flags)
 
 
1189{
1190	struct dst_entry *dst;
1191	bool any_src;
1192
1193	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194	if (dst)
1195		return dst;
 
 
 
 
 
 
1196
1197	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
 
1199	any_src = ipv6_addr_any(&fl6->saddr);
1200	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201	    (fl6->flowi6_oif && any_src))
1202		flags |= RT6_LOOKUP_F_IFACE;
1203
1204	if (!any_src)
1205		flags |= RT6_LOOKUP_F_HAS_SADDR;
1206	else if (sk)
1207		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
1216	struct dst_entry *new = NULL;
1217
1218	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 
1219	if (rt) {
1220		rt6_info_init(rt);
 
1221
1222		new = &rt->dst;
1223		new->__use = 1;
1224		new->input = dst_discard;
1225		new->output = dst_discard_out;
1226
1227		dst_copy_metrics(new, &ort->dst);
1228		rt->rt6i_idev = ort->rt6i_idev;
1229		if (rt->rt6i_idev)
1230			in6_dev_hold(rt->rt6i_idev);
1231
 
1232		rt->rt6i_gateway = ort->rt6i_gateway;
1233		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234		rt->rt6i_metric = 0;
1235
1236		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241		dst_free(new);
1242	}
1243
1244	dst_release(dst_orig);
1245	return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *	Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254	if (rt->dst.from &&
1255	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
 
 
 
 
 
 
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 
 
1260{
1261	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
 
 
 
1262		return NULL;
1263
1264	if (rt6_check_expired(rt))
1265		return NULL;
1266
1267	return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 
 
1271{
1272	if (!__rt6_check_expired(rt) &&
1273	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275		return &rt->dst;
1276	else
1277		return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
1281{
 
 
1282	struct rt6_info *rt;
1283
1284	rt = (struct rt6_info *) dst;
 
 
 
 
 
1285
1286	/* All IPV6 dsts are created with ->obsolete set to the value
1287	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288	 * into this function always.
1289	 */
1290
1291	rt6_dst_from_metrics_check(rt);
1292
1293	if (rt->rt6i_flags & RTF_PCPU ||
1294	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295		return rt6_dst_from_check(rt, cookie);
1296	else
1297		return rt6_check(rt, cookie);
 
 
 
 
1298}
 
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 
1301{
1302	struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304	if (rt) {
1305		if (rt->rt6i_flags & RTF_CACHE) {
1306			if (rt6_check_expired(rt)) {
1307				ip6_del_rt(rt);
1308				dst = NULL;
1309			}
1310		} else {
1311			dst_release(dst);
1312			dst = NULL;
1313		}
 
 
1314	}
1315	return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320	struct rt6_info *rt;
1321
1322	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324	rt = (struct rt6_info *) skb_dst(skb);
1325	if (rt) {
 
1326		if (rt->rt6i_flags & RTF_CACHE) {
1327			dst_hold(&rt->dst);
1328			ip6_del_rt(rt);
1329		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
1331		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1332	}
 
 
 
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337	struct net *net = dev_net(rt->dst.dev);
1338
 
1339	rt->rt6i_flags |= RTF_MODIFIED;
1340	rt->rt6i_pmtu = mtu;
1341	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346	return !(rt->rt6i_flags & RTF_CACHE) &&
1347		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351				 const struct ipv6hdr *iph, u32 mtu)
 
1352{
1353	struct rt6_info *rt6 = (struct rt6_info *)dst;
 
1354
1355	if (rt6->rt6i_flags & RTF_LOCAL)
1356		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
1358	dst_confirm(dst);
1359	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
 
 
 
1360	if (mtu >= dst_mtu(dst))
1361		return;
1362
1363	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364		rt6_do_update_pmtu(rt6, mtu);
1365	} else {
1366		const struct in6_addr *daddr, *saddr;
 
 
 
1367		struct rt6_info *nrt6;
1368
1369		if (iph) {
1370			daddr = &iph->daddr;
1371			saddr = &iph->saddr;
1372		} else if (sk) {
1373			daddr = &sk->sk_v6_daddr;
1374			saddr = &inet6_sk(sk)->saddr;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1375		} else {
1376			return;
1377		}
1378		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 
1379		if (nrt6) {
1380			rt6_do_update_pmtu(nrt6, mtu);
1381
1382			/* ip6_ins_rt(nrt6) will bump the
1383			 * rt6->rt6i_node->fn_sernum
1384			 * which will fail the next rt6_check() and
1385			 * invalidate the sk->sk_dst_cache.
1386			 */
1387			ip6_ins_rt(nrt6);
1388		}
 
 
1389	}
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393			       struct sk_buff *skb, u32 mtu)
 
1394{
1395	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399		     int oif, u32 mark)
1400{
1401	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402	struct dst_entry *dst;
1403	struct flowi6 fl6;
1404
1405	memset(&fl6, 0, sizeof(fl6));
1406	fl6.flowi6_oif = oif;
1407	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408	fl6.daddr = iph->daddr;
1409	fl6.saddr = iph->saddr;
1410	fl6.flowlabel = ip6_flowinfo(iph);
1411
1412	dst = ip6_route_output(net, NULL, &fl6);
1413	if (!dst->error)
1414		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415	dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
 
1421	struct dst_entry *dst;
1422
1423	ip6_update_pmtu(skb, sock_net(sk), mtu,
1424			sk->sk_bound_dev_if, sk->sk_mark);
 
 
 
1425
1426	dst = __sk_dst_get(sk);
1427	if (!dst || !dst->obsolete ||
1428	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429		return;
1430
1431	bh_lock_sock(sk);
1432	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433		ip6_datagram_dst_update(sk, false);
1434	bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440	struct flowi6 fl6;
1441	struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445					     struct fib6_table *table,
1446					     struct flowi6 *fl6,
 
1447					     int flags)
1448{
1449	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450	struct rt6_info *rt;
 
 
 
 
 
 
 
 
1451	struct fib6_node *fn;
1452
1453	/* Get the "current" route for this destination and
1454	 * check if the redirect has come from approriate router.
1455	 *
1456	 * RFC 4861 specifies that redirects should only be
1457	 * accepted if they come from the nexthop to the target.
1458	 * Due to the way the routes are chosen, this notion
1459	 * is a bit fuzzy and one might need to check all possible
1460	 * routes.
1461	 */
1462
1463	read_lock_bh(&table->tb6_lock);
1464	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467		if (rt6_check_expired(rt))
 
1468			continue;
1469		if (rt->dst.error)
1470			break;
1471		if (!(rt->rt6i_flags & RTF_GATEWAY))
1472			continue;
1473		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474			continue;
1475		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476			continue;
1477		break;
 
 
 
 
 
 
 
1478	}
1479
1480	if (!rt)
1481		rt = net->ipv6.ip6_null_entry;
1482	else if (rt->dst.error) {
1483		rt = net->ipv6.ip6_null_entry;
1484		goto out;
1485	}
1486
1487	if (rt == net->ipv6.ip6_null_entry) {
1488		fn = fib6_backtrack(fn, &fl6->saddr);
1489		if (fn)
1490			goto restart;
1491	}
1492
 
 
1493out:
1494	dst_hold(&rt->dst);
 
 
 
 
 
 
1495
1496	read_unlock_bh(&table->tb6_lock);
1497
1498	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499	return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503					const struct flowi6 *fl6,
1504					const struct in6_addr *gateway)
 
1505{
1506	int flags = RT6_LOOKUP_F_HAS_SADDR;
1507	struct ip6rd_flowi rdfl;
1508
1509	rdfl.fl6 = *fl6;
1510	rdfl.gateway = *gateway;
1511
1512	return fib6_rule_lookup(net, &rdfl.fl6,
1513				flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 
1517{
1518	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519	struct dst_entry *dst;
1520	struct flowi6 fl6;
1521
1522	memset(&fl6, 0, sizeof(fl6));
1523	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524	fl6.flowi6_oif = oif;
1525	fl6.flowi6_mark = mark;
1526	fl6.daddr = iph->daddr;
1527	fl6.saddr = iph->saddr;
1528	fl6.flowlabel = ip6_flowinfo(iph);
1529
1530	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531	rt6_do_redirect(dst, NULL, skb);
1532	dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537			    u32 mark)
1538{
1539	const struct ipv6hdr *iph = ipv6_hdr(skb);
1540	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541	struct dst_entry *dst;
1542	struct flowi6 fl6;
1543
1544	memset(&fl6, 0, sizeof(fl6));
1545	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546	fl6.flowi6_oif = oif;
1547	fl6.flowi6_mark = mark;
1548	fl6.daddr = msg->dest;
1549	fl6.saddr = iph->daddr;
1550
1551	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552	rt6_do_redirect(dst, NULL, skb);
1553	dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
 
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564	struct net_device *dev = dst->dev;
1565	unsigned int mtu = dst_mtu(dst);
1566	struct net *net = dev_net(dev);
1567
1568	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573	/*
1574	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1577	 * rely only on pmtu discovery"
1578	 */
1579	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580		mtu = IPV6_MAXPLEN;
1581	return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586	const struct rt6_info *rt = (const struct rt6_info *)dst;
1587	unsigned int mtu = rt->rt6i_pmtu;
1588	struct inet6_dev *idev;
1589
1590	if (mtu)
1591		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1592
1593	mtu = dst_metric_raw(dst, RTAX_MTU);
1594	if (mtu)
1595		goto out;
 
 
1596
1597	mtu = IPV6_MIN_MTU;
 
 
 
 
1598
1599	rcu_read_lock();
1600	idev = __in6_dev_get(dst->dev);
1601	if (idev)
1602		mtu = idev->cnf.mtu6;
1603	rcu_read_unlock();
1604
 
1605out:
1606	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1613				  struct flowi6 *fl6)
1614{
1615	struct dst_entry *dst;
1616	struct rt6_info *rt;
1617	struct inet6_dev *idev = in6_dev_get(dev);
1618	struct net *net = dev_net(dev);
1619
1620	if (unlikely(!idev))
1621		return ERR_PTR(-ENODEV);
1622
1623	rt = ip6_dst_alloc(net, dev, 0);
1624	if (unlikely(!rt)) {
1625		in6_dev_put(idev);
1626		dst = ERR_PTR(-ENOMEM);
1627		goto out;
1628	}
1629
1630	rt->dst.flags |= DST_HOST;
1631	rt->dst.output  = ip6_output;
1632	atomic_set(&rt->dst.__refcnt, 1);
1633	rt->rt6i_gateway  = fl6->daddr;
1634	rt->rt6i_dst.addr = fl6->daddr;
1635	rt->rt6i_dst.plen = 128;
1636	rt->rt6i_idev     = idev;
1637	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639	spin_lock_bh(&icmp6_dst_lock);
1640	rt->dst.next = icmp6_dst_gc_list;
1641	icmp6_dst_gc_list = &rt->dst;
1642	spin_unlock_bh(&icmp6_dst_lock);
1643
1644	fib6_force_start_gc(net);
1645
1646	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649	return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654	struct dst_entry *dst, **pprev;
1655	int more = 0;
1656
1657	spin_lock_bh(&icmp6_dst_lock);
1658	pprev = &icmp6_dst_gc_list;
1659
1660	while ((dst = *pprev) != NULL) {
1661		if (!atomic_read(&dst->__refcnt)) {
1662			*pprev = dst->next;
1663			dst_free(dst);
1664		} else {
1665			pprev = &dst->next;
1666			++more;
1667		}
1668	}
1669
1670	spin_unlock_bh(&icmp6_dst_lock);
1671
1672	return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676			    void *arg)
1677{
1678	struct dst_entry *dst, **pprev;
1679
1680	spin_lock_bh(&icmp6_dst_lock);
1681	pprev = &icmp6_dst_gc_list;
1682	while ((dst = *pprev) != NULL) {
1683		struct rt6_info *rt = (struct rt6_info *) dst;
1684		if (func(rt, arg)) {
1685			*pprev = dst->next;
1686			dst_free(dst);
1687		} else {
1688			pprev = &dst->next;
1689		}
1690	}
1691	spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
1696	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
 
1702	int entries;
1703
1704	entries = dst_entries_get_fast(ops);
1705	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706	    entries <= rt_max_size)
1707		goto out;
1708
1709	net->ipv6.ip6_rt_gc_expire++;
1710	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1711	entries = dst_entries_get_slow(ops);
1712	if (entries < ops->gc_thresh)
1713		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716	return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720			       const struct fib6_config *cfg)
 
1721{
1722	bool ecn_ca = false;
1723	struct nlattr *nla;
1724	int remaining;
1725	u32 *mp;
 
 
 
1726
1727	if (!cfg->fc_mx)
1728		return 0;
 
1729
1730	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731	if (unlikely(!mp))
1732		return -ENOMEM;
1733
1734	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735		int type = nla_type(nla);
1736		u32 val;
1737
1738		if (!type)
1739			continue;
1740		if (unlikely(type > RTAX_MAX))
1741			goto err;
1742
1743		if (type == RTAX_CC_ALGO) {
1744			char tmp[TCP_CA_NAME_MAX];
1745
1746			nla_strlcpy(tmp, nla, sizeof(tmp));
1747			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748			if (val == TCP_CA_UNSPEC)
1749				goto err;
1750		} else {
1751			val = nla_get_u32(nla);
1752		}
1753		if (type == RTAX_HOPLIMIT && val > 255)
1754			val = 255;
1755		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756			goto err;
 
 
 
 
 
 
 
 
 
 
 
1757
1758		mp[type - 1] = val;
1759		__set_bit(type - 1, mxc->mx_valid);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1760	}
1761
1762	if (ecn_ca) {
1763		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 
 
 
 
 
 
 
 
 
 
 
 
1765	}
1766
1767	mxc->mx = mp;
1768	return 0;
1769 err:
1770	kfree(mp);
1771	return -EINVAL;
 
 
 
 
 
 
1772}
1773
1774static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1775{
1776	struct net *net = cfg->fc_nlinfo.nl_net;
1777	struct rt6_info *rt = NULL;
1778	struct net_device *dev = NULL;
1779	struct inet6_dev *idev = NULL;
1780	struct fib6_table *table;
1781	int addr_type;
 
 
 
1782	int err = -EINVAL;
1783
1784	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1785		goto out;
1786#ifndef CONFIG_IPV6_SUBTREES
1787	if (cfg->fc_src_len)
 
 
 
 
1788		goto out;
1789#endif
1790	if (cfg->fc_ifindex) {
1791		err = -ENODEV;
1792		dev = dev_get_by_index(net, cfg->fc_ifindex);
1793		if (!dev)
1794			goto out;
1795		idev = in6_dev_get(dev);
1796		if (!idev)
1797			goto out;
1798	}
1799
1800	if (cfg->fc_metric == 0)
1801		cfg->fc_metric = IP6_RT_PRIO_USER;
1802
1803	err = -ENOBUFS;
1804	if (cfg->fc_nlinfo.nlh &&
1805	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1806		table = fib6_get_table(net, cfg->fc_table);
1807		if (!table) {
1808			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1809			table = fib6_new_table(net, cfg->fc_table);
 
 
 
1810		}
1811	} else {
1812		table = fib6_new_table(net, cfg->fc_table);
 
 
 
 
 
 
 
 
 
 
 
1813	}
1814
1815	if (!table)
1816		goto out;
1817
1818	rt = ip6_dst_alloc(net, NULL,
1819			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
 
 
 
 
 
 
 
1820
1821	if (!rt) {
1822		err = -ENOMEM;
 
 
 
 
1823		goto out;
1824	}
1825
1826	if (cfg->fc_flags & RTF_EXPIRES)
1827		rt6_set_expires(rt, jiffies +
1828				clock_t_to_jiffies(cfg->fc_expires));
1829	else
1830		rt6_clean_expires(rt);
1831
1832	if (cfg->fc_protocol == RTPROT_UNSPEC)
1833		cfg->fc_protocol = RTPROT_BOOT;
1834	rt->rt6i_protocol = cfg->fc_protocol;
 
 
 
 
1835
1836	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
1837
1838	if (addr_type & IPV6_ADDR_MULTICAST)
1839		rt->dst.input = ip6_mc_input;
1840	else if (cfg->fc_flags & RTF_LOCAL)
1841		rt->dst.input = ip6_input;
1842	else
1843		rt->dst.input = ip6_forward;
 
 
 
1844
1845	rt->dst.output = ip6_output;
 
 
 
 
 
 
 
 
1846
1847	if (cfg->fc_encap) {
1848		struct lwtunnel_state *lwtstate;
 
 
 
 
 
 
 
 
1849
1850		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1851					   cfg->fc_encap, AF_INET6, cfg,
1852					   &lwtstate);
1853		if (err)
1854			goto out;
1855		rt->dst.lwtstate = lwtstate_get(lwtstate);
1856		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1857			rt->dst.lwtstate->orig_output = rt->dst.output;
1858			rt->dst.output = lwtunnel_output;
1859		}
1860		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1861			rt->dst.lwtstate->orig_input = rt->dst.input;
1862			rt->dst.input = lwtunnel_input;
1863		}
1864	}
1865
1866	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1867	rt->rt6i_dst.plen = cfg->fc_dst_len;
1868	if (rt->rt6i_dst.plen == 128)
1869		rt->dst.flags |= DST_HOST;
 
1870
1871#ifdef CONFIG_IPV6_SUBTREES
1872	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1873	rt->rt6i_src.plen = cfg->fc_src_len;
1874#endif
1875
1876	rt->rt6i_metric = cfg->fc_metric;
1877
1878	/* We cannot add true routes via loopback here,
1879	   they would result in kernel looping; promote them to reject routes
1880	 */
1881	if ((cfg->fc_flags & RTF_REJECT) ||
1882	    (dev && (dev->flags & IFF_LOOPBACK) &&
1883	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1884	     !(cfg->fc_flags & RTF_LOCAL))) {
1885		/* hold loopback dev/idev if we haven't done so. */
1886		if (dev != net->loopback_dev) {
1887			if (dev) {
1888				dev_put(dev);
1889				in6_dev_put(idev);
1890			}
1891			dev = net->loopback_dev;
1892			dev_hold(dev);
1893			idev = in6_dev_get(dev);
1894			if (!idev) {
1895				err = -ENODEV;
1896				goto out;
1897			}
1898		}
1899		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1900		switch (cfg->fc_type) {
1901		case RTN_BLACKHOLE:
1902			rt->dst.error = -EINVAL;
1903			rt->dst.output = dst_discard_out;
1904			rt->dst.input = dst_discard;
1905			break;
1906		case RTN_PROHIBIT:
1907			rt->dst.error = -EACCES;
1908			rt->dst.output = ip6_pkt_prohibit_out;
1909			rt->dst.input = ip6_pkt_prohibit;
1910			break;
1911		case RTN_THROW:
1912		case RTN_UNREACHABLE:
1913		default:
1914			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1915					: (cfg->fc_type == RTN_UNREACHABLE)
1916					? -EHOSTUNREACH : -ENETUNREACH;
1917			rt->dst.output = ip6_pkt_discard_out;
1918			rt->dst.input = ip6_pkt_discard;
1919			break;
1920		}
1921		goto install_route;
1922	}
1923
1924	if (cfg->fc_flags & RTF_GATEWAY) {
1925		const struct in6_addr *gw_addr;
1926		int gwa_type;
 
 
1927
1928		gw_addr = &cfg->fc_gateway;
1929		gwa_type = ipv6_addr_type(gw_addr);
 
1930
1931		/* if gw_addr is local we will fail to detect this in case
1932		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1933		 * will return already-added prefix route via interface that
1934		 * prefix route was assigned to, which might be non-loopback.
1935		 */
1936		err = -EINVAL;
1937		if (ipv6_chk_addr_and_flags(net, gw_addr,
1938					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1939					    dev : NULL, 0, 0))
1940			goto out;
1941
1942		rt->rt6i_gateway = *gw_addr;
 
 
 
 
1943
1944		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1945			struct rt6_info *grt;
 
 
 
1946
1947			/* IPv6 strictly inhibits using not link-local
1948			   addresses as nexthop address.
1949			   Otherwise, router will not able to send redirects.
1950			   It is very good, but in some (rare!) circumstances
1951			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1952			   some exceptions. --ANK
1953			 */
1954			if (!(gwa_type & IPV6_ADDR_UNICAST))
1955				goto out;
1956
1957			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
 
1958
1959			err = -EHOSTUNREACH;
1960			if (!grt)
1961				goto out;
1962			if (dev) {
1963				if (dev != grt->dst.dev) {
1964					ip6_rt_put(grt);
1965					goto out;
1966				}
1967			} else {
1968				dev = grt->dst.dev;
1969				idev = grt->rt6i_idev;
1970				dev_hold(dev);
1971				in6_dev_hold(grt->rt6i_idev);
1972			}
1973			if (!(grt->rt6i_flags & RTF_GATEWAY))
1974				err = 0;
1975			ip6_rt_put(grt);
1976
1977			if (err)
1978				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1979		}
1980		err = -EINVAL;
1981		if (!dev || (dev->flags & IFF_LOOPBACK))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1982			goto out;
1983	}
1984
1985	err = -ENODEV;
1986	if (!dev)
 
 
 
 
 
 
 
 
 
 
 
1987		goto out;
1988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1989	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 
 
1990		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 
1991			err = -EINVAL;
1992			goto out;
1993		}
1994		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1995		rt->rt6i_prefsrc.plen = 128;
1996	} else
1997		rt->rt6i_prefsrc.plen = 0;
1998
1999	rt->rt6i_flags = cfg->fc_flags;
2000
2001install_route:
2002	rt->dst.dev = dev;
2003	rt->rt6i_idev = idev;
2004	rt->rt6i_table = table;
2005
2006	cfg->fc_nlinfo.nl_net = dev_net(dev);
2007
2008	return rt;
2009out:
2010	if (dev)
2011		dev_put(dev);
2012	if (idev)
2013		in6_dev_put(idev);
2014	if (rt)
2015		dst_free(&rt->dst);
2016
2017	return ERR_PTR(err);
2018}
2019
2020int ip6_route_add(struct fib6_config *cfg)
 
2021{
2022	struct mx6_config mxc = { .mx = NULL, };
2023	struct rt6_info *rt;
2024	int err;
2025
2026	rt = ip6_route_info_create(cfg);
2027	if (IS_ERR(rt)) {
2028		err = PTR_ERR(rt);
2029		rt = NULL;
2030		goto out;
2031	}
2032
2033	err = ip6_convert_metrics(&mxc, cfg);
2034	if (err)
2035		goto out;
2036
2037	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2038
2039	kfree(mxc.mx);
2040
2041	return err;
2042out:
2043	if (rt)
2044		dst_free(&rt->dst);
2045
2046	return err;
2047}
2048
2049static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2050{
2051	int err;
2052	struct fib6_table *table;
2053	struct net *net = dev_net(rt->dst.dev);
2054
2055	if (rt == net->ipv6.ip6_null_entry ||
2056	    rt->dst.flags & DST_NOCACHE) {
2057		err = -ENOENT;
2058		goto out;
2059	}
2060
2061	table = rt->rt6i_table;
2062	write_lock_bh(&table->tb6_lock);
2063	err = fib6_del(rt, info);
2064	write_unlock_bh(&table->tb6_lock);
2065
2066out:
2067	ip6_rt_put(rt);
2068	return err;
2069}
2070
2071int ip6_del_rt(struct rt6_info *rt)
2072{
2073	struct nl_info info = {
2074		.nl_net = dev_net(rt->dst.dev),
 
2075	};
 
2076	return __ip6_del_rt(rt, &info);
2077}
2078
2079static int ip6_route_del(struct fib6_config *cfg)
2080{
 
 
 
2081	struct fib6_table *table;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2082	struct fib6_node *fn;
2083	struct rt6_info *rt;
2084	int err = -ESRCH;
2085
2086	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2087	if (!table)
 
2088		return err;
 
2089
2090	read_lock_bh(&table->tb6_lock);
2091
2092	fn = fib6_locate(&table->tb6_root,
2093			 &cfg->fc_dst, cfg->fc_dst_len,
2094			 &cfg->fc_src, cfg->fc_src_len);
 
2095
2096	if (fn) {
2097		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2098			if ((rt->rt6i_flags & RTF_CACHE) &&
2099			    !(cfg->fc_flags & RTF_CACHE))
 
 
2100				continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2101			if (cfg->fc_ifindex &&
2102			    (!rt->dst.dev ||
2103			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2104				continue;
2105			if (cfg->fc_flags & RTF_GATEWAY &&
2106			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2107				continue;
2108			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2109				continue;
2110			dst_hold(&rt->dst);
2111			read_unlock_bh(&table->tb6_lock);
2112
2113			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
2114		}
2115	}
2116	read_unlock_bh(&table->tb6_lock);
2117
2118	return err;
2119}
2120
2121static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2122{
2123	struct netevent_redirect netevent;
2124	struct rt6_info *rt, *nrt = NULL;
 
2125	struct ndisc_options ndopts;
2126	struct inet6_dev *in6_dev;
2127	struct neighbour *neigh;
2128	struct rd_msg *msg;
2129	int optlen, on_link;
2130	u8 *lladdr;
2131
2132	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2133	optlen -= sizeof(*msg);
2134
2135	if (optlen < 0) {
2136		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2137		return;
2138	}
2139
2140	msg = (struct rd_msg *)icmp6_hdr(skb);
2141
2142	if (ipv6_addr_is_multicast(&msg->dest)) {
2143		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2144		return;
2145	}
2146
2147	on_link = 0;
2148	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2149		on_link = 1;
2150	} else if (ipv6_addr_type(&msg->target) !=
2151		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2152		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2153		return;
2154	}
2155
2156	in6_dev = __in6_dev_get(skb->dev);
2157	if (!in6_dev)
2158		return;
2159	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
 
2160		return;
2161
2162	/* RFC2461 8.1:
2163	 *	The IP source address of the Redirect MUST be the same as the current
2164	 *	first-hop router for the specified ICMP Destination Address.
2165	 */
2166
2167	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2168		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2169		return;
2170	}
2171
2172	lladdr = NULL;
2173	if (ndopts.nd_opts_tgt_lladdr) {
2174		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2175					     skb->dev);
2176		if (!lladdr) {
2177			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2178			return;
2179		}
2180	}
2181
2182	rt = (struct rt6_info *) dst;
2183	if (rt->rt6i_flags & RTF_REJECT) {
2184		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2185		return;
2186	}
2187
2188	/* Redirect received -> path was valid.
2189	 * Look, redirects are sent only in response to data packets,
2190	 * so that this nexthop apparently is reachable. --ANK
2191	 */
2192	dst_confirm(&rt->dst);
2193
2194	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2195	if (!neigh)
2196		return;
2197
2198	/*
2199	 *	We have finally decided to accept it.
2200	 */
2201
2202	neigh_update(neigh, lladdr, NUD_STALE,
2203		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2204		     NEIGH_UPDATE_F_OVERRIDE|
2205		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2206				     NEIGH_UPDATE_F_ISROUTER))
2207		     );
2208
2209	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2210	if (!nrt)
2211		goto out;
2212
2213	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2214	if (on_link)
2215		nrt->rt6i_flags &= ~RTF_GATEWAY;
2216
2217	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2218
2219	if (ip6_ins_rt(nrt))
 
 
2220		goto out;
 
2221
2222	netevent.old = &rt->dst;
2223	netevent.new = &nrt->dst;
2224	netevent.daddr = &msg->dest;
2225	netevent.neigh = neigh;
2226	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2227
2228	if (rt->rt6i_flags & RTF_CACHE) {
2229		rt = (struct rt6_info *) dst_clone(&rt->dst);
2230		ip6_del_rt(rt);
2231	}
2232
2233out:
 
2234	neigh_release(neigh);
2235}
2236
2237/*
2238 *	Misc support functions
2239 */
2240
2241static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2242{
2243	BUG_ON(from->dst.from);
2244
2245	rt->rt6i_flags &= ~RTF_EXPIRES;
2246	dst_hold(&from->dst);
2247	rt->dst.from = &from->dst;
2248	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2249}
2250
2251static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2252{
2253	rt->dst.input = ort->dst.input;
2254	rt->dst.output = ort->dst.output;
2255	rt->rt6i_dst = ort->rt6i_dst;
2256	rt->dst.error = ort->dst.error;
2257	rt->rt6i_idev = ort->rt6i_idev;
2258	if (rt->rt6i_idev)
2259		in6_dev_hold(rt->rt6i_idev);
2260	rt->dst.lastuse = jiffies;
2261	rt->rt6i_gateway = ort->rt6i_gateway;
2262	rt->rt6i_flags = ort->rt6i_flags;
2263	rt6_set_from(rt, ort);
2264	rt->rt6i_metric = ort->rt6i_metric;
2265#ifdef CONFIG_IPV6_SUBTREES
2266	rt->rt6i_src = ort->rt6i_src;
2267#endif
2268	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2269	rt->rt6i_table = ort->rt6i_table;
2270	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2271}
2272
2273#ifdef CONFIG_IPV6_ROUTE_INFO
2274static struct rt6_info *rt6_get_route_info(struct net *net,
2275					   const struct in6_addr *prefix, int prefixlen,
2276					   const struct in6_addr *gwaddr, int ifindex)
 
2277{
 
 
2278	struct fib6_node *fn;
2279	struct rt6_info *rt = NULL;
2280	struct fib6_table *table;
2281
2282	table = fib6_get_table(net, RT6_TABLE_INFO);
2283	if (!table)
2284		return NULL;
2285
2286	read_lock_bh(&table->tb6_lock);
2287	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2288	if (!fn)
2289		goto out;
2290
2291	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2292		if (rt->dst.dev->ifindex != ifindex)
 
2293			continue;
2294		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2295			continue;
2296		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
2297			continue;
2298		dst_hold(&rt->dst);
2299		break;
2300	}
2301out:
2302	read_unlock_bh(&table->tb6_lock);
2303	return rt;
2304}
2305
2306static struct rt6_info *rt6_add_route_info(struct net *net,
2307					   const struct in6_addr *prefix, int prefixlen,
2308					   const struct in6_addr *gwaddr, int ifindex,
 
2309					   unsigned int pref)
2310{
2311	struct fib6_config cfg = {
2312		.fc_metric	= IP6_RT_PRIO_USER,
2313		.fc_ifindex	= ifindex,
2314		.fc_dst_len	= prefixlen,
2315		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2316				  RTF_UP | RTF_PREF(pref),
 
 
2317		.fc_nlinfo.portid = 0,
2318		.fc_nlinfo.nlh = NULL,
2319		.fc_nlinfo.nl_net = net,
2320	};
2321
2322	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2323	cfg.fc_dst = *prefix;
2324	cfg.fc_gateway = *gwaddr;
2325
2326	/* We should treat it as a default route if prefix length is 0. */
2327	if (!prefixlen)
2328		cfg.fc_flags |= RTF_DEFAULT;
2329
2330	ip6_route_add(&cfg);
2331
2332	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2333}
2334#endif
2335
2336struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
2337{
2338	struct rt6_info *rt;
 
2339	struct fib6_table *table;
2340
2341	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2342	if (!table)
2343		return NULL;
2344
2345	read_lock_bh(&table->tb6_lock);
2346	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2347		if (dev == rt->dst.dev &&
2348		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2349		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
2350			break;
2351	}
2352	if (rt)
2353		dst_hold(&rt->dst);
2354	read_unlock_bh(&table->tb6_lock);
2355	return rt;
2356}
2357
2358struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
2359				     struct net_device *dev,
2360				     unsigned int pref)
 
 
2361{
2362	struct fib6_config cfg = {
2363		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2364		.fc_metric	= IP6_RT_PRIO_USER,
2365		.fc_ifindex	= dev->ifindex,
2366		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2367				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 
 
2368		.fc_nlinfo.portid = 0,
2369		.fc_nlinfo.nlh = NULL,
2370		.fc_nlinfo.nl_net = dev_net(dev),
 
2371	};
2372
2373	cfg.fc_gateway = *gwaddr;
2374
2375	ip6_route_add(&cfg);
 
 
 
 
 
 
2376
2377	return rt6_get_dflt_router(gwaddr, dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2378}
2379
2380void rt6_purge_dflt_routers(struct net *net)
2381{
2382	struct rt6_info *rt;
2383	struct fib6_table *table;
 
 
2384
2385	/* NOTE: Keep consistent with rt6_get_dflt_router */
2386	table = fib6_get_table(net, RT6_TABLE_DFLT);
2387	if (!table)
2388		return;
2389
2390restart:
2391	read_lock_bh(&table->tb6_lock);
2392	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2394		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2395			dst_hold(&rt->dst);
2396			read_unlock_bh(&table->tb6_lock);
2397			ip6_del_rt(rt);
2398			goto restart;
2399		}
2400	}
2401	read_unlock_bh(&table->tb6_lock);
 
2402}
2403
2404static void rtmsg_to_fib6_config(struct net *net,
2405				 struct in6_rtmsg *rtmsg,
2406				 struct fib6_config *cfg)
2407{
2408	memset(cfg, 0, sizeof(*cfg));
2409
2410	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2411			 : RT6_TABLE_MAIN;
2412	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2413	cfg->fc_metric = rtmsg->rtmsg_metric;
2414	cfg->fc_expires = rtmsg->rtmsg_info;
2415	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2416	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2417	cfg->fc_flags = rtmsg->rtmsg_flags;
2418
2419	cfg->fc_nlinfo.nl_net = net;
2420
2421	cfg->fc_dst = rtmsg->rtmsg_dst;
2422	cfg->fc_src = rtmsg->rtmsg_src;
2423	cfg->fc_gateway = rtmsg->rtmsg_gateway;
 
2424}
2425
2426int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2427{
2428	struct fib6_config cfg;
2429	struct in6_rtmsg rtmsg;
2430	int err;
2431
2432	switch (cmd) {
2433	case SIOCADDRT:		/* Add a route */
2434	case SIOCDELRT:		/* Delete a route */
2435		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2436			return -EPERM;
2437		err = copy_from_user(&rtmsg, arg,
2438				     sizeof(struct in6_rtmsg));
2439		if (err)
2440			return -EFAULT;
2441
2442		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2443
2444		rtnl_lock();
2445		switch (cmd) {
2446		case SIOCADDRT:
2447			err = ip6_route_add(&cfg);
2448			break;
2449		case SIOCDELRT:
2450			err = ip6_route_del(&cfg);
2451			break;
2452		default:
2453			err = -EINVAL;
2454		}
2455		rtnl_unlock();
2456
2457		return err;
 
 
 
 
 
 
 
2458	}
2459
2460	return -EINVAL;
2461}
2462
2463/*
2464 *	Drop the packet on the floor
2465 */
2466
2467static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2468{
2469	int type;
2470	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
 
 
 
 
 
2471	switch (ipstats_mib_noroutes) {
2472	case IPSTATS_MIB_INNOROUTES:
2473		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2474		if (type == IPV6_ADDR_ANY) {
2475			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2476				      IPSTATS_MIB_INADDRERRORS);
2477			break;
2478		}
2479		/* FALLTHROUGH */
 
2480	case IPSTATS_MIB_OUTNOROUTES:
2481		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2482			      ipstats_mib_noroutes);
2483		break;
2484	}
 
 
 
 
 
2485	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2486	kfree_skb(skb);
2487	return 0;
2488}
2489
2490static int ip6_pkt_discard(struct sk_buff *skb)
2491{
2492	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2493}
2494
2495static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2496{
2497	skb->dev = skb_dst(skb)->dev;
2498	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2499}
2500
2501static int ip6_pkt_prohibit(struct sk_buff *skb)
2502{
2503	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2504}
2505
2506static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2507{
2508	skb->dev = skb_dst(skb)->dev;
2509	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2510}
2511
2512/*
2513 *	Allocate a dst for local (unicast / anycast) address.
2514 */
2515
2516struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517				    const struct in6_addr *addr,
2518				    bool anycast)
2519{
2520	u32 tb_id;
2521	struct net *net = dev_net(idev->dev);
2522	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2523					    DST_NOCOUNT);
2524	if (!rt)
2525		return ERR_PTR(-ENOMEM);
2526
2527	in6_dev_hold(idev);
2528
2529	rt->dst.flags |= DST_HOST;
2530	rt->dst.input = ip6_input;
2531	rt->dst.output = ip6_output;
2532	rt->rt6i_idev = idev;
2533
2534	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2535	if (anycast)
2536		rt->rt6i_flags |= RTF_ANYCAST;
2537	else
2538		rt->rt6i_flags |= RTF_LOCAL;
2539
2540	rt->rt6i_gateway  = *addr;
2541	rt->rt6i_dst.addr = *addr;
2542	rt->rt6i_dst.plen = 128;
2543	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2544	rt->rt6i_table = fib6_get_table(net, tb_id);
2545	rt->dst.flags |= DST_NOCACHE;
 
2546
2547	atomic_set(&rt->dst.__refcnt, 1);
 
 
2548
2549	return rt;
2550}
 
 
 
2551
2552int ip6_route_get_saddr(struct net *net,
2553			struct rt6_info *rt,
2554			const struct in6_addr *daddr,
2555			unsigned int prefs,
2556			struct in6_addr *saddr)
2557{
2558	struct inet6_dev *idev =
2559		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2560	int err = 0;
2561	if (rt && rt->rt6i_prefsrc.plen)
2562		*saddr = rt->rt6i_prefsrc.addr;
2563	else
2564		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2565					 daddr, prefs, saddr);
2566	return err;
2567}
2568
2569/* remove deleted ip from prefsrc entries */
2570struct arg_dev_net_ip {
2571	struct net_device *dev;
2572	struct net *net;
2573	struct in6_addr *addr;
2574};
2575
2576static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2577{
2578	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2579	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2580	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2581
2582	if (((void *)rt->dst.dev == dev || !dev) &&
2583	    rt != net->ipv6.ip6_null_entry &&
2584	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2585		/* remove prefsrc entry */
2586		rt->rt6i_prefsrc.plen = 0;
 
2587	}
2588	return 0;
2589}
2590
2591void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2592{
2593	struct net *net = dev_net(ifp->idev->dev);
2594	struct arg_dev_net_ip adni = {
2595		.dev = ifp->idev->dev,
2596		.net = net,
2597		.addr = &ifp->addr,
2598	};
2599	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2600}
2601
2602#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2603#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2604
2605/* Remove routers and update dst entries when gateway turn into host. */
2606static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2607{
2608	struct in6_addr *gateway = (struct in6_addr *)arg;
 
2609
2610	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2611	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2612	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
 
 
 
 
2613		return -1;
2614	}
 
 
 
 
 
 
2615	return 0;
2616}
2617
2618void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2619{
2620	fib6_clean_all(net, fib6_clean_tohost, gateway);
2621}
2622
2623struct arg_dev_net {
2624	struct net_device *dev;
2625	struct net *net;
 
 
 
2626};
2627
2628static int fib6_ifdown(struct rt6_info *rt, void *arg)
2629{
2630	const struct arg_dev_net *adn = arg;
2631	const struct net_device *dev = adn->dev;
2632
2633	if ((rt->dst.dev == dev || !dev) &&
2634	    rt != adn->net->ipv6.ip6_null_entry)
2635		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2636
2637	return 0;
2638}
2639
2640void rt6_ifdown(struct net *net, struct net_device *dev)
2641{
2642	struct arg_dev_net adn = {
2643		.dev = dev,
2644		.net = net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2645	};
 
 
 
 
 
 
 
2646
2647	fib6_clean_all(net, fib6_ifdown, &adn);
2648	icmp6_clean_all(fib6_ifdown, &adn);
2649	if (dev)
2650		rt6_uncached_list_flush_dev(net, dev);
 
2651}
2652
2653struct rt6_mtu_change_arg {
2654	struct net_device *dev;
2655	unsigned int mtu;
 
2656};
2657
2658static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2659{
2660	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2661	struct inet6_dev *idev;
2662
2663	/* In IPv6 pmtu discovery is not optional,
2664	   so that RTAX_MTU lock cannot disable it.
2665	   We still use this lock to block changes
2666	   caused by addrconf/ndisc.
2667	*/
2668
2669	idev = __in6_dev_get(arg->dev);
2670	if (!idev)
2671		return 0;
2672
2673	/* For administrative MTU increase, there is no way to discover
2674	   IPv6 PMTU increase, so PMTU increase should be updated here.
2675	   Since RFC 1981 doesn't include administrative MTU increase
2676	   update PMTU increase is a MUST. (i.e. jumbo frame)
2677	 */
2678	/*
2679	   If new MTU is less than route PMTU, this new MTU will be the
2680	   lowest MTU in the path, update the route PMTU to reflect PMTU
2681	   decreases; if new MTU is greater than route PMTU, and the
2682	   old MTU is the lowest MTU in the path, update the route PMTU
2683	   to reflect the increase. In this case if the other nodes' MTU
2684	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2685	   PMTU discouvery.
2686	 */
2687	if (rt->dst.dev == arg->dev &&
2688	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2689		if (rt->rt6i_flags & RTF_CACHE) {
2690			/* For RTF_CACHE with rt6i_pmtu == 0
2691			 * (i.e. a redirected route),
2692			 * the metrics of its rt->dst.from has already
2693			 * been updated.
2694			 */
2695			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2696				rt->rt6i_pmtu = arg->mtu;
2697		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2698			   (dst_mtu(&rt->dst) < arg->mtu &&
2699			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2700			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2701		}
2702	}
2703	return 0;
 
2704}
2705
2706void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2707{
2708	struct rt6_mtu_change_arg arg = {
2709		.dev = dev,
2710		.mtu = mtu,
2711	};
2712
2713	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2714}
2715
2716static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2717	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2718	[RTA_OIF]               = { .type = NLA_U32 },
2719	[RTA_IIF]		= { .type = NLA_U32 },
2720	[RTA_PRIORITY]          = { .type = NLA_U32 },
2721	[RTA_METRICS]           = { .type = NLA_NESTED },
2722	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2723	[RTA_PREF]              = { .type = NLA_U8 },
2724	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2725	[RTA_ENCAP]		= { .type = NLA_NESTED },
2726	[RTA_EXPIRES]		= { .type = NLA_U32 },
 
 
 
 
 
 
 
2727};
2728
2729static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2730			      struct fib6_config *cfg)
 
2731{
2732	struct rtmsg *rtm;
2733	struct nlattr *tb[RTA_MAX+1];
2734	unsigned int pref;
2735	int err;
2736
2737	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2738	if (err < 0)
2739		goto errout;
2740
2741	err = -EINVAL;
2742	rtm = nlmsg_data(nlh);
2743	memset(cfg, 0, sizeof(*cfg));
2744
2745	cfg->fc_table = rtm->rtm_table;
2746	cfg->fc_dst_len = rtm->rtm_dst_len;
2747	cfg->fc_src_len = rtm->rtm_src_len;
2748	cfg->fc_flags = RTF_UP;
2749	cfg->fc_protocol = rtm->rtm_protocol;
2750	cfg->fc_type = rtm->rtm_type;
 
 
 
 
 
 
 
 
 
 
 
 
2751
2752	if (rtm->rtm_type == RTN_UNREACHABLE ||
2753	    rtm->rtm_type == RTN_BLACKHOLE ||
2754	    rtm->rtm_type == RTN_PROHIBIT ||
2755	    rtm->rtm_type == RTN_THROW)
2756		cfg->fc_flags |= RTF_REJECT;
2757
2758	if (rtm->rtm_type == RTN_LOCAL)
2759		cfg->fc_flags |= RTF_LOCAL;
2760
2761	if (rtm->rtm_flags & RTM_F_CLONED)
2762		cfg->fc_flags |= RTF_CACHE;
2763
2764	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2765	cfg->fc_nlinfo.nlh = nlh;
2766	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
2767
2768	if (tb[RTA_GATEWAY]) {
2769		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2770		cfg->fc_flags |= RTF_GATEWAY;
2771	}
 
 
 
 
2772
2773	if (tb[RTA_DST]) {
2774		int plen = (rtm->rtm_dst_len + 7) >> 3;
2775
2776		if (nla_len(tb[RTA_DST]) < plen)
2777			goto errout;
2778
2779		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2780	}
2781
2782	if (tb[RTA_SRC]) {
2783		int plen = (rtm->rtm_src_len + 7) >> 3;
2784
2785		if (nla_len(tb[RTA_SRC]) < plen)
2786			goto errout;
2787
2788		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2789	}
2790
2791	if (tb[RTA_PREFSRC])
2792		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2793
2794	if (tb[RTA_OIF])
2795		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2796
2797	if (tb[RTA_PRIORITY])
2798		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2799
2800	if (tb[RTA_METRICS]) {
2801		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2802		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2803	}
2804
2805	if (tb[RTA_TABLE])
2806		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2807
2808	if (tb[RTA_MULTIPATH]) {
2809		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2810		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 
 
 
 
 
2811	}
2812
2813	if (tb[RTA_PREF]) {
2814		pref = nla_get_u8(tb[RTA_PREF]);
2815		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2816		    pref != ICMPV6_ROUTER_PREF_HIGH)
2817			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2818		cfg->fc_flags |= RTF_PREF(pref);
2819	}
2820
2821	if (tb[RTA_ENCAP])
2822		cfg->fc_encap = tb[RTA_ENCAP];
2823
2824	if (tb[RTA_ENCAP_TYPE])
2825		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2826
 
 
 
 
 
2827	if (tb[RTA_EXPIRES]) {
2828		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2829
2830		if (addrconf_finite_timeout(timeout)) {
2831			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2832			cfg->fc_flags |= RTF_EXPIRES;
2833		}
2834	}
2835
2836	err = 0;
2837errout:
2838	return err;
2839}
2840
2841struct rt6_nh {
2842	struct rt6_info *rt6_info;
2843	struct fib6_config r_cfg;
2844	struct mx6_config mxc;
2845	struct list_head next;
2846};
2847
2848static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2849{
2850	struct rt6_nh *nh;
2851
2852	list_for_each_entry(nh, rt6_nh_list, next) {
2853		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2854		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2855		        nh->r_cfg.fc_ifindex);
2856	}
2857}
2858
2859static int ip6_route_info_append(struct list_head *rt6_nh_list,
2860				 struct rt6_info *rt, struct fib6_config *r_cfg)
2861{
2862	struct rt6_nh *nh;
2863	struct rt6_info *rtnh;
2864	int err = -EEXIST;
2865
2866	list_for_each_entry(nh, rt6_nh_list, next) {
2867		/* check if rt6_info already exists */
2868		rtnh = nh->rt6_info;
2869
2870		if (rtnh->dst.dev == rt->dst.dev &&
2871		    rtnh->rt6i_idev == rt->rt6i_idev &&
2872		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2873				    &rt->rt6i_gateway))
2874			return err;
2875	}
2876
2877	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2878	if (!nh)
2879		return -ENOMEM;
2880	nh->rt6_info = rt;
2881	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2882	if (err) {
2883		kfree(nh);
2884		return err;
2885	}
2886	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2887	list_add_tail(&nh->next, rt6_nh_list);
2888
2889	return 0;
2890}
2891
2892static int ip6_route_multipath_add(struct fib6_config *cfg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2893{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2894	struct fib6_config r_cfg;
2895	struct rtnexthop *rtnh;
2896	struct rt6_info *rt;
2897	struct rt6_nh *err_nh;
2898	struct rt6_nh *nh, *nh_safe;
 
2899	int remaining;
2900	int attrlen;
2901	int err = 1;
2902	int nhn = 0;
2903	int replace = (cfg->fc_nlinfo.nlh &&
2904		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2905	LIST_HEAD(rt6_nh_list);
2906
 
 
 
 
2907	remaining = cfg->fc_mp_len;
2908	rtnh = (struct rtnexthop *)cfg->fc_mp;
2909
2910	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2911	 * rt6_info structs per nexthop
2912	 */
2913	while (rtnh_ok(rtnh, remaining)) {
2914		memcpy(&r_cfg, cfg, sizeof(*cfg));
2915		if (rtnh->rtnh_ifindex)
2916			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2917
2918		attrlen = rtnh_attrlen(rtnh);
2919		if (attrlen > 0) {
2920			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2921
2922			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2923			if (nla) {
2924				r_cfg.fc_gateway = nla_get_in6_addr(nla);
 
 
 
 
2925				r_cfg.fc_flags |= RTF_GATEWAY;
2926			}
2927			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
 
 
 
 
2928			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2929			if (nla)
2930				r_cfg.fc_encap_type = nla_get_u16(nla);
2931		}
2932
2933		rt = ip6_route_info_create(&r_cfg);
 
2934		if (IS_ERR(rt)) {
2935			err = PTR_ERR(rt);
2936			rt = NULL;
2937			goto cleanup;
2938		}
 
 
 
 
 
 
 
 
 
2939
2940		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 
2941		if (err) {
2942			dst_free(&rt->dst);
2943			goto cleanup;
2944		}
2945
2946		rtnh = rtnh_next(rtnh, &remaining);
2947	}
2948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2949	err_nh = NULL;
2950	list_for_each_entry(nh, &rt6_nh_list, next) {
2951		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2952		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2953		nh->rt6_info = NULL;
2954		if (err) {
2955			if (replace && nhn)
2956				ip6_print_replace_route_err(&rt6_nh_list);
 
2957			err_nh = nh;
2958			goto add_errout;
2959		}
 
 
 
 
 
 
2960
2961		/* Because each route is added like a single route we remove
2962		 * these flags after the first nexthop: if there is a collision,
2963		 * we have already failed to add the first nexthop:
2964		 * fib6_add_rt2node() has rejected it; when replacing, old
2965		 * nexthops have been replaced by first new, the rest should
2966		 * be added to it.
2967		 */
2968		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2969						     NLM_F_REPLACE);
 
 
 
2970		nhn++;
2971	}
2972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2973	goto cleanup;
2974
2975add_errout:
 
 
 
 
 
 
 
2976	/* Delete routes that were already added */
2977	list_for_each_entry(nh, &rt6_nh_list, next) {
2978		if (err_nh == nh)
2979			break;
2980		ip6_route_del(&nh->r_cfg);
2981	}
2982
2983cleanup:
2984	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2985		if (nh->rt6_info)
2986			dst_free(&nh->rt6_info->dst);
2987		kfree(nh->mxc.mx);
2988		list_del(&nh->next);
2989		kfree(nh);
2990	}
2991
2992	return err;
2993}
2994
2995static int ip6_route_multipath_del(struct fib6_config *cfg)
 
2996{
2997	struct fib6_config r_cfg;
2998	struct rtnexthop *rtnh;
 
2999	int remaining;
3000	int attrlen;
3001	int err = 1, last_err = 0;
3002
3003	remaining = cfg->fc_mp_len;
3004	rtnh = (struct rtnexthop *)cfg->fc_mp;
3005
3006	/* Parse a Multipath Entry */
3007	while (rtnh_ok(rtnh, remaining)) {
3008		memcpy(&r_cfg, cfg, sizeof(*cfg));
3009		if (rtnh->rtnh_ifindex)
3010			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3011
3012		attrlen = rtnh_attrlen(rtnh);
3013		if (attrlen > 0) {
3014			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3015
3016			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3017			if (nla) {
3018				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
 
 
 
 
 
 
3019				r_cfg.fc_flags |= RTF_GATEWAY;
3020			}
3021		}
3022		err = ip6_route_del(&r_cfg);
3023		if (err)
3024			last_err = err;
3025
 
3026		rtnh = rtnh_next(rtnh, &remaining);
3027	}
3028
3029	return last_err;
3030}
3031
3032static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 
3033{
3034	struct fib6_config cfg;
3035	int err;
3036
3037	err = rtm_to_fib6_config(skb, nlh, &cfg);
3038	if (err < 0)
3039		return err;
3040
 
 
 
 
 
 
3041	if (cfg.fc_mp)
3042		return ip6_route_multipath_del(&cfg);
3043	else
3044		return ip6_route_del(&cfg);
 
 
3045}
3046
3047static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 
3048{
3049	struct fib6_config cfg;
3050	int err;
3051
3052	err = rtm_to_fib6_config(skb, nlh, &cfg);
3053	if (err < 0)
3054		return err;
3055
 
 
 
3056	if (cfg.fc_mp)
3057		return ip6_route_multipath_add(&cfg);
3058	else
3059		return ip6_route_add(&cfg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3060}
3061
3062static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3063{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3064	return NLMSG_ALIGN(sizeof(struct rtmsg))
3065	       + nla_total_size(16) /* RTA_SRC */
3066	       + nla_total_size(16) /* RTA_DST */
3067	       + nla_total_size(16) /* RTA_GATEWAY */
3068	       + nla_total_size(16) /* RTA_PREFSRC */
3069	       + nla_total_size(4) /* RTA_TABLE */
3070	       + nla_total_size(4) /* RTA_IIF */
3071	       + nla_total_size(4) /* RTA_OIF */
3072	       + nla_total_size(4) /* RTA_PRIORITY */
3073	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3074	       + nla_total_size(sizeof(struct rta_cacheinfo))
3075	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3076	       + nla_total_size(1) /* RTA_PREF */
3077	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3078}
3079
3080static int rt6_fill_node(struct net *net,
3081			 struct sk_buff *skb, struct rt6_info *rt,
3082			 struct in6_addr *dst, struct in6_addr *src,
3083			 int iif, int type, u32 portid, u32 seq,
3084			 int prefix, int nowait, unsigned int flags)
3085{
3086	u32 metrics[RTAX_MAX];
3087	struct rtmsg *rtm;
3088	struct nlmsghdr *nlh;
3089	long expires;
3090	u32 table;
3091
3092	if (prefix) {	/* user wants prefix routes only */
3093		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3094			/* success since this is not a prefix route */
3095			return 1;
3096		}
 
 
 
 
 
 
 
 
 
 
3097	}
3098
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3099	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3100	if (!nlh)
3101		return -EMSGSIZE;
3102
 
 
 
 
 
 
 
 
 
 
3103	rtm = nlmsg_data(nlh);
3104	rtm->rtm_family = AF_INET6;
3105	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3106	rtm->rtm_src_len = rt->rt6i_src.plen;
3107	rtm->rtm_tos = 0;
3108	if (rt->rt6i_table)
3109		table = rt->rt6i_table->tb6_id;
3110	else
3111		table = RT6_TABLE_UNSPEC;
3112	rtm->rtm_table = table;
3113	if (nla_put_u32(skb, RTA_TABLE, table))
3114		goto nla_put_failure;
3115	if (rt->rt6i_flags & RTF_REJECT) {
3116		switch (rt->dst.error) {
3117		case -EINVAL:
3118			rtm->rtm_type = RTN_BLACKHOLE;
3119			break;
3120		case -EACCES:
3121			rtm->rtm_type = RTN_PROHIBIT;
3122			break;
3123		case -EAGAIN:
3124			rtm->rtm_type = RTN_THROW;
3125			break;
3126		default:
3127			rtm->rtm_type = RTN_UNREACHABLE;
3128			break;
3129		}
3130	}
3131	else if (rt->rt6i_flags & RTF_LOCAL)
3132		rtm->rtm_type = RTN_LOCAL;
3133	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3134		rtm->rtm_type = RTN_LOCAL;
3135	else
3136		rtm->rtm_type = RTN_UNICAST;
3137	rtm->rtm_flags = 0;
3138	if (!netif_carrier_ok(rt->dst.dev)) {
3139		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3140		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3141			rtm->rtm_flags |= RTNH_F_DEAD;
3142	}
3143	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3144	rtm->rtm_protocol = rt->rt6i_protocol;
3145	if (rt->rt6i_flags & RTF_DYNAMIC)
3146		rtm->rtm_protocol = RTPROT_REDIRECT;
3147	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3148		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3149			rtm->rtm_protocol = RTPROT_RA;
3150		else
3151			rtm->rtm_protocol = RTPROT_KERNEL;
3152	}
3153
3154	if (rt->rt6i_flags & RTF_CACHE)
3155		rtm->rtm_flags |= RTM_F_CLONED;
3156
3157	if (dst) {
3158		if (nla_put_in6_addr(skb, RTA_DST, dst))
3159			goto nla_put_failure;
3160		rtm->rtm_dst_len = 128;
3161	} else if (rtm->rtm_dst_len)
3162		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3163			goto nla_put_failure;
3164#ifdef CONFIG_IPV6_SUBTREES
3165	if (src) {
3166		if (nla_put_in6_addr(skb, RTA_SRC, src))
3167			goto nla_put_failure;
3168		rtm->rtm_src_len = 128;
3169	} else if (rtm->rtm_src_len &&
3170		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3171		goto nla_put_failure;
3172#endif
3173	if (iif) {
3174#ifdef CONFIG_IPV6_MROUTE
3175		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3176			int err = ip6mr_get_route(net, skb, rtm, nowait);
3177			if (err <= 0) {
3178				if (!nowait) {
3179					if (err == 0)
3180						return 0;
3181					goto nla_put_failure;
3182				} else {
3183					if (err == -EMSGSIZE)
3184						goto nla_put_failure;
3185				}
3186			}
3187		} else
3188#endif
3189			if (nla_put_u32(skb, RTA_IIF, iif))
3190				goto nla_put_failure;
3191	} else if (dst) {
3192		struct in6_addr saddr_buf;
3193		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3194		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3195			goto nla_put_failure;
3196	}
3197
3198	if (rt->rt6i_prefsrc.plen) {
3199		struct in6_addr saddr_buf;
3200		saddr_buf = rt->rt6i_prefsrc.addr;
3201		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3202			goto nla_put_failure;
3203	}
3204
3205	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3206	if (rt->rt6i_pmtu)
3207		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3208	if (rtnetlink_put_metrics(skb, metrics) < 0)
3209		goto nla_put_failure;
3210
3211	if (rt->rt6i_flags & RTF_GATEWAY) {
3212		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
 
 
 
 
 
 
 
3213			goto nla_put_failure;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3214	}
3215
3216	if (rt->dst.dev &&
3217	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3218		goto nla_put_failure;
3219	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3220		goto nla_put_failure;
3221
3222	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
 
 
 
 
 
 
3223
3224	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3225		goto nla_put_failure;
3226
3227	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3228		goto nla_put_failure;
3229
3230	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3231
3232	nlmsg_end(skb, nlh);
3233	return 0;
3234
3235nla_put_failure:
3236	nlmsg_cancel(skb, nlh);
3237	return -EMSGSIZE;
3238}
3239
3240int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3241{
3242	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3243	int prefix;
 
 
 
3244
3245	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3246		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3247		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3248	} else
3249		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3250
3251	return rt6_fill_node(arg->net,
3252		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3253		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3254		     prefix, 0, NLM_F_MULTI);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3255}
3256
3257static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3258{
3259	struct net *net = sock_net(in_skb->sk);
3260	struct nlattr *tb[RTA_MAX+1];
 
 
 
3261	struct rt6_info *rt;
3262	struct sk_buff *skb;
3263	struct rtmsg *rtm;
3264	struct flowi6 fl6;
3265	int err, iif = 0, oif = 0;
3266
3267	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3268	if (err < 0)
3269		goto errout;
3270
3271	err = -EINVAL;
3272	memset(&fl6, 0, sizeof(fl6));
 
 
3273
3274	if (tb[RTA_SRC]) {
3275		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3276			goto errout;
3277
3278		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3279	}
3280
3281	if (tb[RTA_DST]) {
3282		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3283			goto errout;
3284
3285		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3286	}
3287
3288	if (tb[RTA_IIF])
3289		iif = nla_get_u32(tb[RTA_IIF]);
3290
3291	if (tb[RTA_OIF])
3292		oif = nla_get_u32(tb[RTA_OIF]);
3293
3294	if (tb[RTA_MARK])
3295		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3297	if (iif) {
3298		struct net_device *dev;
3299		int flags = 0;
3300
3301		dev = __dev_get_by_index(net, iif);
 
 
3302		if (!dev) {
 
3303			err = -ENODEV;
3304			goto errout;
3305		}
3306
3307		fl6.flowi6_iif = iif;
3308
3309		if (!ipv6_addr_any(&fl6.saddr))
3310			flags |= RT6_LOOKUP_F_HAS_SADDR;
3311
3312		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3313							       flags);
 
3314	} else {
3315		fl6.flowi6_oif = oif;
3316
3317		if (netif_index_is_l3_master(net, oif)) {
3318			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3319					   FLOWI_FLAG_SKIP_NH_OIF;
3320		}
 
 
 
 
 
 
3321
3322		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 
 
 
3323	}
3324
3325	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3326	if (!skb) {
3327		ip6_rt_put(rt);
3328		err = -ENOBUFS;
3329		goto errout;
3330	}
3331
3332	/* Reserve room for dummy headers, this skb can pass
3333	   through good chunk of routing engine.
3334	 */
3335	skb_reset_mac_header(skb);
3336	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3337
3338	skb_dst_set(skb, &rt->dst);
3339
3340	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3341			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3342			    nlh->nlmsg_seq, 0, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3343	if (err < 0) {
3344		kfree_skb(skb);
3345		goto errout;
3346	}
3347
3348	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3349errout:
3350	return err;
3351}
3352
3353void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3354		     unsigned int nlm_flags)
3355{
3356	struct sk_buff *skb;
3357	struct net *net = info->nl_net;
3358	u32 seq;
3359	int err;
3360
3361	err = -ENOBUFS;
3362	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3363
3364	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3365	if (!skb)
3366		goto errout;
3367
3368	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3369				event, info->portid, seq, 0, 0, nlm_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3370	if (err < 0) {
3371		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3372		WARN_ON(err == -EMSGSIZE);
3373		kfree_skb(skb);
3374		goto errout;
3375	}
3376	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3377		    info->nlh, gfp_any());
3378	return;
3379errout:
3380	if (err < 0)
3381		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3382}
3383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3384static int ip6_route_dev_notify(struct notifier_block *this,
3385				unsigned long event, void *ptr)
3386{
3387	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3388	struct net *net = dev_net(dev);
3389
3390	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
3391		net->ipv6.ip6_null_entry->dst.dev = dev;
3392		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3393#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3395		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3396		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3397		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3398#endif
 
 
 
 
 
 
 
 
 
 
3399	}
3400
3401	return NOTIFY_OK;
3402}
3403
3404/*
3405 *	/proc
3406 */
3407
3408#ifdef CONFIG_PROC_FS
3409
3410static const struct file_operations ipv6_route_proc_fops = {
3411	.owner		= THIS_MODULE,
3412	.open		= ipv6_route_open,
3413	.read		= seq_read,
3414	.llseek		= seq_lseek,
3415	.release	= seq_release_net,
3416};
3417
3418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3419{
3420	struct net *net = (struct net *)seq->private;
3421	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3422		   net->ipv6.rt6_stats->fib_nodes,
3423		   net->ipv6.rt6_stats->fib_route_nodes,
3424		   net->ipv6.rt6_stats->fib_rt_alloc,
3425		   net->ipv6.rt6_stats->fib_rt_entries,
3426		   net->ipv6.rt6_stats->fib_rt_cache,
3427		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3428		   net->ipv6.rt6_stats->fib_discarded_routes);
3429
3430	return 0;
3431}
3432
3433static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3434{
3435	return single_open_net(inode, file, rt6_stats_seq_show);
3436}
3437
3438static const struct file_operations rt6_stats_seq_fops = {
3439	.owner	 = THIS_MODULE,
3440	.open	 = rt6_stats_seq_open,
3441	.read	 = seq_read,
3442	.llseek	 = seq_lseek,
3443	.release = single_release_net,
3444};
3445#endif	/* CONFIG_PROC_FS */
3446
3447#ifdef CONFIG_SYSCTL
3448
3449static
3450int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3451			      void __user *buffer, size_t *lenp, loff_t *ppos)
3452{
3453	struct net *net;
3454	int delay;
 
3455	if (!write)
3456		return -EINVAL;
3457
3458	net = (struct net *)ctl->extra1;
3459	delay = net->ipv6.sysctl.flush_delay;
3460	proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 
 
3461	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3462	return 0;
3463}
3464
3465struct ctl_table ipv6_route_table_template[] = {
3466	{
3467		.procname	=	"flush",
3468		.data		=	&init_net.ipv6.sysctl.flush_delay,
3469		.maxlen		=	sizeof(int),
3470		.mode		=	0200,
3471		.proc_handler	=	ipv6_sysctl_rtcache_flush
3472	},
3473	{
3474		.procname	=	"gc_thresh",
3475		.data		=	&ip6_dst_ops_template.gc_thresh,
3476		.maxlen		=	sizeof(int),
3477		.mode		=	0644,
3478		.proc_handler	=	proc_dointvec,
3479	},
3480	{
3481		.procname	=	"max_size",
3482		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3483		.maxlen		=	sizeof(int),
3484		.mode		=	0644,
3485		.proc_handler	=	proc_dointvec,
3486	},
3487	{
3488		.procname	=	"gc_min_interval",
3489		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3490		.maxlen		=	sizeof(int),
3491		.mode		=	0644,
3492		.proc_handler	=	proc_dointvec_jiffies,
3493	},
3494	{
3495		.procname	=	"gc_timeout",
3496		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3497		.maxlen		=	sizeof(int),
3498		.mode		=	0644,
3499		.proc_handler	=	proc_dointvec_jiffies,
3500	},
3501	{
3502		.procname	=	"gc_interval",
3503		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3504		.maxlen		=	sizeof(int),
3505		.mode		=	0644,
3506		.proc_handler	=	proc_dointvec_jiffies,
3507	},
3508	{
3509		.procname	=	"gc_elasticity",
3510		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3511		.maxlen		=	sizeof(int),
3512		.mode		=	0644,
3513		.proc_handler	=	proc_dointvec,
3514	},
3515	{
3516		.procname	=	"mtu_expires",
3517		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3518		.maxlen		=	sizeof(int),
3519		.mode		=	0644,
3520		.proc_handler	=	proc_dointvec_jiffies,
3521	},
3522	{
3523		.procname	=	"min_adv_mss",
3524		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3525		.maxlen		=	sizeof(int),
3526		.mode		=	0644,
3527		.proc_handler	=	proc_dointvec,
3528	},
3529	{
3530		.procname	=	"gc_min_interval_ms",
3531		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532		.maxlen		=	sizeof(int),
3533		.mode		=	0644,
3534		.proc_handler	=	proc_dointvec_ms_jiffies,
3535	},
 
 
 
 
 
 
 
 
 
3536	{ }
3537};
3538
3539struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3540{
3541	struct ctl_table *table;
3542
3543	table = kmemdup(ipv6_route_table_template,
3544			sizeof(ipv6_route_table_template),
3545			GFP_KERNEL);
3546
3547	if (table) {
3548		table[0].data = &net->ipv6.sysctl.flush_delay;
3549		table[0].extra1 = net;
3550		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3551		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
3552		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3553		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3554		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3555		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3556		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3557		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3558		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
3559
3560		/* Don't export sysctls to unprivileged users */
3561		if (net->user_ns != &init_user_ns)
3562			table[0].procname = NULL;
3563	}
3564
3565	return table;
3566}
 
 
 
 
 
 
 
 
 
3567#endif
3568
3569static int __net_init ip6_route_net_init(struct net *net)
3570{
3571	int ret = -ENOMEM;
3572
3573	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3574	       sizeof(net->ipv6.ip6_dst_ops));
3575
3576	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3577		goto out_ip6_dst_ops;
3578
 
 
 
 
 
 
3579	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3580					   sizeof(*net->ipv6.ip6_null_entry),
3581					   GFP_KERNEL);
3582	if (!net->ipv6.ip6_null_entry)
3583		goto out_ip6_dst_entries;
3584	net->ipv6.ip6_null_entry->dst.path =
3585		(struct dst_entry *)net->ipv6.ip6_null_entry;
3586	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3587	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3588			 ip6_template_metrics, true);
 
3589
3590#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
3591	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3592					       sizeof(*net->ipv6.ip6_prohibit_entry),
3593					       GFP_KERNEL);
3594	if (!net->ipv6.ip6_prohibit_entry)
3595		goto out_ip6_null_entry;
3596	net->ipv6.ip6_prohibit_entry->dst.path =
3597		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3598	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3599	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3600			 ip6_template_metrics, true);
 
3601
3602	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3603					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3604					       GFP_KERNEL);
3605	if (!net->ipv6.ip6_blk_hole_entry)
3606		goto out_ip6_prohibit_entry;
3607	net->ipv6.ip6_blk_hole_entry->dst.path =
3608		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3609	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3610	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3611			 ip6_template_metrics, true);
 
 
 
 
3612#endif
3613
3614	net->ipv6.sysctl.flush_delay = 0;
3615	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3616	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3617	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3618	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3619	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3620	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3621	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
3622
3623	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3624
3625	ret = 0;
3626out:
3627	return ret;
3628
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630out_ip6_prohibit_entry:
3631	kfree(net->ipv6.ip6_prohibit_entry);
3632out_ip6_null_entry:
3633	kfree(net->ipv6.ip6_null_entry);
3634#endif
 
 
3635out_ip6_dst_entries:
3636	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3637out_ip6_dst_ops:
3638	goto out;
3639}
3640
3641static void __net_exit ip6_route_net_exit(struct net *net)
3642{
 
3643	kfree(net->ipv6.ip6_null_entry);
3644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3645	kfree(net->ipv6.ip6_prohibit_entry);
3646	kfree(net->ipv6.ip6_blk_hole_entry);
3647#endif
3648	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3649}
3650
3651static int __net_init ip6_route_net_init_late(struct net *net)
3652{
3653#ifdef CONFIG_PROC_FS
3654	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3655	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
 
 
 
 
 
 
 
 
3656#endif
3657	return 0;
3658}
3659
3660static void __net_exit ip6_route_net_exit_late(struct net *net)
3661{
3662#ifdef CONFIG_PROC_FS
3663	remove_proc_entry("ipv6_route", net->proc_net);
3664	remove_proc_entry("rt6_stats", net->proc_net);
3665#endif
3666}
3667
3668static struct pernet_operations ip6_route_net_ops = {
3669	.init = ip6_route_net_init,
3670	.exit = ip6_route_net_exit,
3671};
3672
3673static int __net_init ipv6_inetpeer_init(struct net *net)
3674{
3675	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3676
3677	if (!bp)
3678		return -ENOMEM;
3679	inet_peer_base_init(bp);
3680	net->ipv6.peers = bp;
3681	return 0;
3682}
3683
3684static void __net_exit ipv6_inetpeer_exit(struct net *net)
3685{
3686	struct inet_peer_base *bp = net->ipv6.peers;
3687
3688	net->ipv6.peers = NULL;
3689	inetpeer_invalidate_tree(bp);
3690	kfree(bp);
3691}
3692
3693static struct pernet_operations ipv6_inetpeer_ops = {
3694	.init	=	ipv6_inetpeer_init,
3695	.exit	=	ipv6_inetpeer_exit,
3696};
3697
3698static struct pernet_operations ip6_route_net_late_ops = {
3699	.init = ip6_route_net_init_late,
3700	.exit = ip6_route_net_exit_late,
3701};
3702
3703static struct notifier_block ip6_route_dev_notifier = {
3704	.notifier_call = ip6_route_dev_notify,
3705	.priority = 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3706};
3707
 
 
 
 
 
 
 
 
 
 
 
 
 
3708int __init ip6_route_init(void)
3709{
3710	int ret;
3711	int cpu;
3712
3713	ret = -ENOMEM;
3714	ip6_dst_ops_template.kmem_cachep =
3715		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3716				  SLAB_HWCACHE_ALIGN, NULL);
3717	if (!ip6_dst_ops_template.kmem_cachep)
3718		goto out;
3719
3720	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3721	if (ret)
3722		goto out_kmem_cache;
3723
3724	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3725	if (ret)
3726		goto out_dst_entries;
3727
3728	ret = register_pernet_subsys(&ip6_route_net_ops);
3729	if (ret)
3730		goto out_register_inetpeer;
3731
3732	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3733
3734	/* Registering of the loopback is done before this portion of code,
3735	 * the loopback reference in rt6_info will not be taken, do it
3736	 * manually for init_net */
3737	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3738	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3739  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3740	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3741	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3742	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3743	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3744  #endif
3745	ret = fib6_init();
3746	if (ret)
3747		goto out_register_subsys;
3748
3749	ret = xfrm6_init();
3750	if (ret)
3751		goto out_fib6_init;
3752
3753	ret = fib6_rules_init();
3754	if (ret)
3755		goto xfrm6_init;
3756
3757	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3758	if (ret)
3759		goto fib6_rules_init;
3760
3761	ret = -ENOBUFS;
3762	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3763	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3764	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
 
 
 
 
 
 
 
 
 
 
3765		goto out_register_late_subsys;
3766
3767	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3768	if (ret)
3769		goto out_register_late_subsys;
3770
 
 
 
 
 
 
 
 
3771	for_each_possible_cpu(cpu) {
3772		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3773
3774		INIT_LIST_HEAD(&ul->head);
 
3775		spin_lock_init(&ul->lock);
3776	}
3777
3778out:
3779	return ret;
3780
3781out_register_late_subsys:
 
3782	unregister_pernet_subsys(&ip6_route_net_late_ops);
3783fib6_rules_init:
3784	fib6_rules_cleanup();
3785xfrm6_init:
3786	xfrm6_fini();
3787out_fib6_init:
3788	fib6_gc_cleanup();
3789out_register_subsys:
3790	unregister_pernet_subsys(&ip6_route_net_ops);
3791out_register_inetpeer:
3792	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793out_dst_entries:
3794	dst_entries_destroy(&ip6_dst_blackhole_ops);
3795out_kmem_cache:
3796	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797	goto out;
3798}
3799
3800void ip6_route_cleanup(void)
3801{
 
 
 
 
 
3802	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3803	unregister_pernet_subsys(&ip6_route_net_late_ops);
3804	fib6_rules_cleanup();
3805	xfrm6_fini();
3806	fib6_gc_cleanup();
3807	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3808	unregister_pernet_subsys(&ip6_route_net_ops);
3809	dst_entries_destroy(&ip6_dst_blackhole_ops);
3810	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3811}
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <linux/siphash.h>
  45#include <net/net_namespace.h>
  46#include <net/snmp.h>
  47#include <net/ipv6.h>
  48#include <net/ip6_fib.h>
  49#include <net/ip6_route.h>
  50#include <net/ndisc.h>
  51#include <net/addrconf.h>
  52#include <net/tcp.h>
  53#include <linux/rtnetlink.h>
  54#include <net/dst.h>
  55#include <net/dst_metadata.h>
  56#include <net/xfrm.h>
  57#include <net/netevent.h>
  58#include <net/netlink.h>
  59#include <net/rtnh.h>
  60#include <net/lwtunnel.h>
  61#include <net/ip_tunnels.h>
  62#include <net/l3mdev.h>
  63#include <net/ip.h>
  64#include <linux/uaccess.h>
  65#include <linux/btf_ids.h>
  66
  67#ifdef CONFIG_SYSCTL
  68#include <linux/sysctl.h>
  69#endif
  70
  71static int ip6_rt_type_to_error(u8 fib6_type);
  72
  73#define CREATE_TRACE_POINTS
  74#include <trace/events/fib6.h>
  75EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  76#undef CREATE_TRACE_POINTS
  77
  78enum rt6_nud_state {
  79	RT6_NUD_FAIL_HARD = -3,
  80	RT6_NUD_FAIL_PROBE = -2,
  81	RT6_NUD_FAIL_DO_RR = -1,
  82	RT6_NUD_SUCCEED = 1
  83};
  84
  85INDIRECT_CALLABLE_SCOPE
  86struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  87static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  88INDIRECT_CALLABLE_SCOPE
  89unsigned int		ip6_mtu(const struct dst_entry *dst);
  90static void		ip6_negative_advice(struct sock *sk,
  91					    struct dst_entry *dst);
  92static void		ip6_dst_destroy(struct dst_entry *);
  93static void		ip6_dst_ifdown(struct dst_entry *,
  94				       struct net_device *dev);
  95static void		 ip6_dst_gc(struct dst_ops *ops);
  96
  97static int		ip6_pkt_discard(struct sk_buff *skb);
  98static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  99static int		ip6_pkt_prohibit(struct sk_buff *skb);
 100static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 101static void		ip6_link_failure(struct sk_buff *skb);
 102static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 103					   struct sk_buff *skb, u32 mtu,
 104					   bool confirm_neigh);
 105static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 106					struct sk_buff *skb);
 107static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 108			   int strict);
 109static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 110static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 111			 struct fib6_info *rt, struct dst_entry *dst,
 112			 struct in6_addr *dest, struct in6_addr *src,
 113			 int iif, int type, u32 portid, u32 seq,
 114			 unsigned int flags);
 115static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 116					   const struct in6_addr *daddr,
 117					   const struct in6_addr *saddr);
 118
 119#ifdef CONFIG_IPV6_ROUTE_INFO
 120static struct fib6_info *rt6_add_route_info(struct net *net,
 121					   const struct in6_addr *prefix, int prefixlen,
 122					   const struct in6_addr *gwaddr,
 123					   struct net_device *dev,
 124					   unsigned int pref);
 125static struct fib6_info *rt6_get_route_info(struct net *net,
 126					   const struct in6_addr *prefix, int prefixlen,
 127					   const struct in6_addr *gwaddr,
 128					   struct net_device *dev);
 129#endif
 130
 131struct uncached_list {
 132	spinlock_t		lock;
 133	struct list_head	head;
 134	struct list_head	quarantine;
 135};
 136
 137static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 138
 139void rt6_uncached_list_add(struct rt6_info *rt)
 140{
 141	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 142
 143	rt->dst.rt_uncached_list = ul;
 
 144
 145	spin_lock_bh(&ul->lock);
 146	list_add_tail(&rt->dst.rt_uncached, &ul->head);
 147	spin_unlock_bh(&ul->lock);
 148}
 149
 150void rt6_uncached_list_del(struct rt6_info *rt)
 151{
 152	if (!list_empty(&rt->dst.rt_uncached)) {
 153		struct uncached_list *ul = rt->dst.rt_uncached_list;
 154
 155		spin_lock_bh(&ul->lock);
 156		list_del_init(&rt->dst.rt_uncached);
 157		spin_unlock_bh(&ul->lock);
 158	}
 159}
 160
 161static void rt6_uncached_list_flush_dev(struct net_device *dev)
 162{
 
 163	int cpu;
 164
 
 
 
 165	for_each_possible_cpu(cpu) {
 166		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 167		struct rt6_info *rt, *safe;
 168
 169		if (list_empty(&ul->head))
 170			continue;
 171
 172		spin_lock_bh(&ul->lock);
 173		list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
 174			struct inet6_dev *rt_idev = rt->rt6i_idev;
 175			struct net_device *rt_dev = rt->dst.dev;
 176			bool handled = false;
 177
 178			if (rt_idev->dev == dev) {
 179				rt->rt6i_idev = in6_dev_get(blackhole_netdev);
 180				in6_dev_put(rt_idev);
 181				handled = true;
 182			}
 183
 184			if (rt_dev == dev) {
 185				rt->dst.dev = blackhole_netdev;
 186				netdev_ref_replace(rt_dev, blackhole_netdev,
 187						   &rt->dst.dev_tracker,
 188						   GFP_ATOMIC);
 189				handled = true;
 190			}
 191			if (handled)
 192				list_move(&rt->dst.rt_uncached,
 193					  &ul->quarantine);
 194		}
 195		spin_unlock_bh(&ul->lock);
 196	}
 197}
 198
 199static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 200					     struct sk_buff *skb,
 201					     const void *daddr)
 202{
 
 
 203	if (!ipv6_addr_any(p))
 204		return (const void *) p;
 205	else if (skb)
 206		return &ipv6_hdr(skb)->daddr;
 207	return daddr;
 208}
 209
 210struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 211				   struct net_device *dev,
 212				   struct sk_buff *skb,
 213				   const void *daddr)
 214{
 
 215	struct neighbour *n;
 216
 217	daddr = choose_neigh_daddr(gw, skb, daddr);
 218	n = __ipv6_neigh_lookup(dev, daddr);
 219	if (n)
 220		return n;
 221
 222	n = neigh_create(&nd_tbl, daddr, dev);
 223	return IS_ERR(n) ? NULL : n;
 224}
 225
 226static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 227					      struct sk_buff *skb,
 228					      const void *daddr)
 229{
 230	const struct rt6_info *rt = dst_rt6_info(dst);
 231
 232	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 233				dst->dev, skb, daddr);
 234}
 235
 236static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 237{
 238	const struct rt6_info *rt = dst_rt6_info(dst);
 239	struct net_device *dev = dst->dev;
 240
 241	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 242	if (!daddr)
 243		return;
 244	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 245		return;
 246	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 247		return;
 248	__ipv6_confirm_neigh(dev, daddr);
 249}
 250
 251static struct dst_ops ip6_dst_ops_template = {
 252	.family			=	AF_INET6,
 253	.gc			=	ip6_dst_gc,
 254	.gc_thresh		=	1024,
 255	.check			=	ip6_dst_check,
 256	.default_advmss		=	ip6_default_advmss,
 257	.mtu			=	ip6_mtu,
 258	.cow_metrics		=	dst_cow_metrics_generic,
 259	.destroy		=	ip6_dst_destroy,
 260	.ifdown			=	ip6_dst_ifdown,
 261	.negative_advice	=	ip6_negative_advice,
 262	.link_failure		=	ip6_link_failure,
 263	.update_pmtu		=	ip6_rt_update_pmtu,
 264	.redirect		=	rt6_do_redirect,
 265	.local_out		=	__ip6_local_out,
 266	.neigh_lookup		=	ip6_dst_neigh_lookup,
 267	.confirm_neigh		=	ip6_confirm_neigh,
 268};
 269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 270static struct dst_ops ip6_dst_blackhole_ops = {
 271	.family			= AF_INET6,
 272	.default_advmss		= ip6_default_advmss,
 273	.neigh_lookup		= ip6_dst_neigh_lookup,
 274	.check			= ip6_dst_check,
 275	.destroy		= ip6_dst_destroy,
 276	.cow_metrics		= dst_cow_metrics_generic,
 277	.update_pmtu		= dst_blackhole_update_pmtu,
 278	.redirect		= dst_blackhole_redirect,
 279	.mtu			= dst_blackhole_mtu,
 280};
 281
 282static const u32 ip6_template_metrics[RTAX_MAX] = {
 283	[RTAX_HOPLIMIT - 1] = 0,
 284};
 285
 286static const struct fib6_info fib6_null_entry_template = {
 287	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 288	.fib6_protocol  = RTPROT_KERNEL,
 289	.fib6_metric	= ~(u32)0,
 290	.fib6_ref	= REFCOUNT_INIT(1),
 291	.fib6_type	= RTN_UNREACHABLE,
 292	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 293};
 294
 295static const struct rt6_info ip6_null_entry_template = {
 296	.dst = {
 297		.__rcuref	= RCUREF_INIT(1),
 298		.__use		= 1,
 299		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 300		.error		= -ENETUNREACH,
 301		.input		= ip6_pkt_discard,
 302		.output		= ip6_pkt_discard_out,
 303	},
 304	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 305};
 306
 307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 308
 309static const struct rt6_info ip6_prohibit_entry_template = {
 310	.dst = {
 311		.__rcuref	= RCUREF_INIT(1),
 312		.__use		= 1,
 313		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 314		.error		= -EACCES,
 315		.input		= ip6_pkt_prohibit,
 316		.output		= ip6_pkt_prohibit_out,
 317	},
 318	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 319};
 320
 321static const struct rt6_info ip6_blk_hole_entry_template = {
 322	.dst = {
 323		.__rcuref	= RCUREF_INIT(1),
 324		.__use		= 1,
 325		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 326		.error		= -EINVAL,
 327		.input		= dst_discard,
 328		.output		= dst_discard_out,
 329	},
 330	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 331};
 332
 333#endif
 334
 335static void rt6_info_init(struct rt6_info *rt)
 336{
 337	memset_after(rt, 0, dst);
 
 
 
 
 338}
 339
 340/* allocate dst with ip6_dst_ops */
 341struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 342			       int flags)
 343{
 344	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 345					DST_OBSOLETE_FORCE_CHK, flags);
 346
 347	if (rt) {
 348		rt6_info_init(rt);
 349		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 
 
 
 
 
 
 
 
 
 
 
 
 
 350	}
 351
 352	return rt;
 353}
 354EXPORT_SYMBOL(ip6_dst_alloc);
 355
 356static void ip6_dst_destroy(struct dst_entry *dst)
 357{
 358	struct rt6_info *rt = dst_rt6_info(dst);
 359	struct fib6_info *from;
 360	struct inet6_dev *idev;
 361
 362	ip_dst_metrics_put(dst);
 
 363	rt6_uncached_list_del(rt);
 364
 365	idev = rt->rt6i_idev;
 366	if (idev) {
 367		rt->rt6i_idev = NULL;
 368		in6_dev_put(idev);
 369	}
 370
 371	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 372	fib6_info_release(from);
 373}
 374
 375static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 
 376{
 377	struct rt6_info *rt = dst_rt6_info(dst);
 378	struct inet6_dev *idev = rt->rt6i_idev;
 
 
 379
 380	if (idev && idev->dev != blackhole_netdev) {
 381		struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
 382
 383		if (blackhole_idev) {
 384			rt->rt6i_idev = blackhole_idev;
 385			in6_dev_put(idev);
 
 
 386		}
 387	}
 388}
 389
 390static bool __rt6_check_expired(const struct rt6_info *rt)
 391{
 392	if (rt->rt6i_flags & RTF_EXPIRES)
 393		return time_after(jiffies, rt->dst.expires);
 394	else
 395		return false;
 396}
 397
 398static bool rt6_check_expired(const struct rt6_info *rt)
 399{
 400	struct fib6_info *from;
 401
 402	from = rcu_dereference(rt->from);
 403
 404	if (rt->rt6i_flags & RTF_EXPIRES) {
 405		if (time_after(jiffies, rt->dst.expires))
 406			return true;
 407	} else if (from) {
 408		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 409			fib6_check_expired(from);
 410	}
 411	return false;
 412}
 413
 414void fib6_select_path(const struct net *net, struct fib6_result *res,
 415		      struct flowi6 *fl6, int oif, bool have_oif_match,
 416		      const struct sk_buff *skb, int strict)
 
 
 
 417{
 418	struct fib6_info *sibling, *next_sibling;
 419	struct fib6_info *match = res->f6i;
 420
 421	if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
 422		goto out;
 423
 424	if (match->nh && have_oif_match && res->nh)
 425		return;
 426
 427	if (skb)
 428		IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
 429
 430	/* We might have already computed the hash for ICMPv6 errors. In such
 431	 * case it will always be non-zero. Otherwise now is the time to do it.
 
 432	 */
 433	if (!fl6->mp_hash &&
 434	    (!match->nh || nexthop_is_multipath(match->nh)))
 435		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 436
 437	if (unlikely(match->nh)) {
 438		nexthop_path_fib6_result(res, fl6->mp_hash);
 439		return;
 440	}
 441
 442	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 443		goto out;
 444
 445	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 446				 fib6_siblings) {
 447		const struct fib6_nh *nh = sibling->fib6_nh;
 448		int nh_upper_bound;
 449
 450		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 451		if (fl6->mp_hash > nh_upper_bound)
 452			continue;
 453		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 454			break;
 455		match = sibling;
 456		break;
 457	}
 458
 459out:
 460	res->f6i = match;
 461	res->nh = match->fib6_nh;
 462}
 463
 464/*
 465 *	Route lookup. rcu_read_lock() should be held.
 466 */
 467
 468static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 469			       const struct in6_addr *saddr, int oif, int flags)
 470{
 471	const struct net_device *dev;
 472
 473	if (nh->fib_nh_flags & RTNH_F_DEAD)
 474		return false;
 475
 476	dev = nh->fib_nh_dev;
 477	if (oif) {
 478		if (dev->ifindex == oif)
 479			return true;
 480	} else {
 481		if (ipv6_chk_addr(net, saddr, dev,
 482				  flags & RT6_LOOKUP_F_IFACE))
 483			return true;
 484	}
 485
 486	return false;
 487}
 488
 489struct fib6_nh_dm_arg {
 490	struct net		*net;
 491	const struct in6_addr	*saddr;
 492	int			oif;
 493	int			flags;
 494	struct fib6_nh		*nh;
 495};
 496
 497static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 498{
 499	struct fib6_nh_dm_arg *arg = _arg;
 500
 501	arg->nh = nh;
 502	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 503				  arg->flags);
 504}
 505
 506/* returns fib6_nh from nexthop or NULL */
 507static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 508					struct fib6_result *res,
 509					const struct in6_addr *saddr,
 510					int oif, int flags)
 511{
 512	struct fib6_nh_dm_arg arg = {
 513		.net   = net,
 514		.saddr = saddr,
 515		.oif   = oif,
 516		.flags = flags,
 517	};
 518
 519	if (nexthop_is_blackhole(nh))
 520		return NULL;
 521
 522	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 523		return arg.nh;
 524
 525	return NULL;
 526}
 527
 528static void rt6_device_match(struct net *net, struct fib6_result *res,
 529			     const struct in6_addr *saddr, int oif, int flags)
 530{
 531	struct fib6_info *f6i = res->f6i;
 532	struct fib6_info *spf6i;
 533	struct fib6_nh *nh;
 534
 535	if (!oif && ipv6_addr_any(saddr)) {
 536		if (unlikely(f6i->nh)) {
 537			nh = nexthop_fib6_nh(f6i->nh);
 538			if (nexthop_is_blackhole(f6i->nh))
 539				goto out_blackhole;
 540		} else {
 541			nh = f6i->fib6_nh;
 
 
 542		}
 543		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 544			goto out;
 545	}
 546
 547	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 548		bool matched = false;
 549
 550		if (unlikely(spf6i->nh)) {
 551			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 552					      oif, flags);
 553			if (nh)
 554				matched = true;
 555		} else {
 556			nh = spf6i->fib6_nh;
 557			if (__rt6_device_match(net, nh, saddr, oif, flags))
 558				matched = true;
 559		}
 560		if (matched) {
 561			res->f6i = spf6i;
 562			goto out;
 563		}
 564	}
 565
 566	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 567		res->f6i = net->ipv6.fib6_null_entry;
 568		nh = res->f6i->fib6_nh;
 569		goto out;
 570	}
 571
 572	if (unlikely(f6i->nh)) {
 573		nh = nexthop_fib6_nh(f6i->nh);
 574		if (nexthop_is_blackhole(f6i->nh))
 575			goto out_blackhole;
 576	} else {
 577		nh = f6i->fib6_nh;
 578	}
 579
 580	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 581		res->f6i = net->ipv6.fib6_null_entry;
 582		nh = res->f6i->fib6_nh;
 583	}
 584out:
 585	res->nh = nh;
 586	res->fib6_type = res->f6i->fib6_type;
 587	res->fib6_flags = res->f6i->fib6_flags;
 588	return;
 589
 590out_blackhole:
 591	res->fib6_flags |= RTF_REJECT;
 592	res->fib6_type = RTN_BLACKHOLE;
 593	res->nh = nh;
 594}
 595
 596#ifdef CONFIG_IPV6_ROUTER_PREF
 597struct __rt6_probe_work {
 598	struct work_struct work;
 599	struct in6_addr target;
 600	struct net_device *dev;
 601	netdevice_tracker dev_tracker;
 602};
 603
 604static void rt6_probe_deferred(struct work_struct *w)
 605{
 606	struct in6_addr mcaddr;
 607	struct __rt6_probe_work *work =
 608		container_of(w, struct __rt6_probe_work, work);
 609
 610	addrconf_addr_solict_mult(&work->target, &mcaddr);
 611	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 612	netdev_put(work->dev, &work->dev_tracker);
 613	kfree(work);
 614}
 615
 616static void rt6_probe(struct fib6_nh *fib6_nh)
 617{
 618	struct __rt6_probe_work *work = NULL;
 619	const struct in6_addr *nh_gw;
 620	unsigned long last_probe;
 621	struct neighbour *neigh;
 622	struct net_device *dev;
 623	struct inet6_dev *idev;
 624
 625	/*
 626	 * Okay, this does not seem to be appropriate
 627	 * for now, however, we need to check if it
 628	 * is really so; aka Router Reachability Probing.
 629	 *
 630	 * Router Reachability Probe MUST be rate-limited
 631	 * to no more than one per minute.
 632	 */
 633	if (!fib6_nh->fib_nh_gw_family)
 634		return;
 635
 636	nh_gw = &fib6_nh->fib_nh_gw6;
 637	dev = fib6_nh->fib_nh_dev;
 638	rcu_read_lock();
 639	last_probe = READ_ONCE(fib6_nh->last_probe);
 640	idev = __in6_dev_get(dev);
 641	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 642	if (neigh) {
 643		if (READ_ONCE(neigh->nud_state) & NUD_VALID)
 644			goto out;
 645
 646		write_lock_bh(&neigh->lock);
 
 647		if (!(neigh->nud_state & NUD_VALID) &&
 648		    time_after(jiffies,
 649			       neigh->updated +
 650			       READ_ONCE(idev->cnf.rtr_probe_interval))) {
 651			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 652			if (work)
 653				__neigh_set_probe_once(neigh);
 654		}
 655		write_unlock_bh(&neigh->lock);
 656	} else if (time_after(jiffies, last_probe +
 657				       READ_ONCE(idev->cnf.rtr_probe_interval))) {
 658		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 659	}
 660
 661	if (!work || cmpxchg(&fib6_nh->last_probe,
 662			     last_probe, jiffies) != last_probe) {
 663		kfree(work);
 664	} else {
 665		INIT_WORK(&work->work, rt6_probe_deferred);
 666		work->target = *nh_gw;
 667		netdev_hold(dev, &work->dev_tracker, GFP_ATOMIC);
 668		work->dev = dev;
 669		schedule_work(&work->work);
 670	}
 671
 672out:
 673	rcu_read_unlock();
 674}
 675#else
 676static inline void rt6_probe(struct fib6_nh *fib6_nh)
 677{
 678}
 679#endif
 680
 681/*
 682 * Default Router Selection (RFC 2461 6.3.6)
 683 */
 684static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 685{
 
 686	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 687	struct neighbour *neigh;
 688
 689	rcu_read_lock();
 690	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 691					  &fib6_nh->fib_nh_gw6);
 
 
 
 692	if (neigh) {
 693		u8 nud_state = READ_ONCE(neigh->nud_state);
 694
 695		if (nud_state & NUD_VALID)
 696			ret = RT6_NUD_SUCCEED;
 697#ifdef CONFIG_IPV6_ROUTER_PREF
 698		else if (!(nud_state & NUD_FAILED))
 699			ret = RT6_NUD_SUCCEED;
 700		else
 701			ret = RT6_NUD_FAIL_PROBE;
 702#endif
 
 703	} else {
 704		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 705		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 706	}
 707	rcu_read_unlock();
 708
 709	return ret;
 710}
 711
 712static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 713			   int strict)
 714{
 715	int m = 0;
 716
 717	if (!oif || nh->fib_nh_dev->ifindex == oif)
 718		m = 2;
 719
 
 720	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 721		return RT6_NUD_FAIL_HARD;
 722#ifdef CONFIG_IPV6_ROUTER_PREF
 723	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 724#endif
 725	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 726	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 727		int n = rt6_check_neigh(nh);
 728		if (n < 0)
 729			return n;
 730	}
 731	return m;
 732}
 733
 734static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 735		       int oif, int strict, int *mpri, bool *do_rr)
 
 736{
 
 737	bool match_do_rr = false;
 738	bool rc = false;
 739	int m;
 740
 741	if (nh->fib_nh_flags & RTNH_F_DEAD)
 
 742		goto out;
 743
 744	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 745	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 746	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 747		goto out;
 748
 749	m = rt6_score_route(nh, fib6_flags, oif, strict);
 750	if (m == RT6_NUD_FAIL_DO_RR) {
 751		match_do_rr = true;
 752		m = 0; /* lowest valid score */
 753	} else if (m == RT6_NUD_FAIL_HARD) {
 754		goto out;
 755	}
 756
 757	if (strict & RT6_LOOKUP_F_REACHABLE)
 758		rt6_probe(nh);
 759
 760	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 761	if (m > *mpri) {
 762		*do_rr = match_do_rr;
 763		*mpri = m;
 764		rc = true;
 765	}
 766out:
 767	return rc;
 768}
 769
 770struct fib6_nh_frl_arg {
 771	u32		flags;
 772	int		oif;
 773	int		strict;
 774	int		*mpri;
 775	bool		*do_rr;
 776	struct fib6_nh	*nh;
 777};
 778
 779static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 780{
 781	struct fib6_nh_frl_arg *arg = _arg;
 
 782
 783	arg->nh = nh;
 784	return find_match(nh, arg->flags, arg->oif, arg->strict,
 785			  arg->mpri, arg->do_rr);
 786}
 
 
 
 787
 788static void __find_rr_leaf(struct fib6_info *f6i_start,
 789			   struct fib6_info *nomatch, u32 metric,
 790			   struct fib6_result *res, struct fib6_info **cont,
 791			   int oif, int strict, bool *do_rr, int *mpri)
 792{
 793	struct fib6_info *f6i;
 794
 795	for (f6i = f6i_start;
 796	     f6i && f6i != nomatch;
 797	     f6i = rcu_dereference(f6i->fib6_next)) {
 798		bool matched = false;
 799		struct fib6_nh *nh;
 800
 801		if (cont && f6i->fib6_metric != metric) {
 802			*cont = f6i;
 803			return;
 804		}
 805
 806		if (fib6_check_expired(f6i))
 807			continue;
 808
 809		if (unlikely(f6i->nh)) {
 810			struct fib6_nh_frl_arg arg = {
 811				.flags  = f6i->fib6_flags,
 812				.oif    = oif,
 813				.strict = strict,
 814				.mpri   = mpri,
 815				.do_rr  = do_rr
 816			};
 817
 818			if (nexthop_is_blackhole(f6i->nh)) {
 819				res->fib6_flags = RTF_REJECT;
 820				res->fib6_type = RTN_BLACKHOLE;
 821				res->f6i = f6i;
 822				res->nh = nexthop_fib6_nh(f6i->nh);
 823				return;
 824			}
 825			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 826						     &arg)) {
 827				matched = true;
 828				nh = arg.nh;
 829			}
 830		} else {
 831			nh = f6i->fib6_nh;
 832			if (find_match(nh, f6i->fib6_flags, oif, strict,
 833				       mpri, do_rr))
 834				matched = true;
 835		}
 836		if (matched) {
 837			res->f6i = f6i;
 838			res->nh = nh;
 839			res->fib6_flags = f6i->fib6_flags;
 840			res->fib6_type = f6i->fib6_type;
 841		}
 842	}
 843}
 844
 845static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 846			 struct fib6_info *rr_head, int oif, int strict,
 847			 bool *do_rr, struct fib6_result *res)
 848{
 849	u32 metric = rr_head->fib6_metric;
 850	struct fib6_info *cont = NULL;
 851	int mpri = -1;
 852
 853	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 854		       oif, strict, do_rr, &mpri);
 855
 856	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 857		       oif, strict, do_rr, &mpri);
 858
 859	if (res->f6i || !cont)
 860		return;
 861
 862	__find_rr_leaf(cont, NULL, metric, res, NULL,
 863		       oif, strict, do_rr, &mpri);
 864}
 865
 866static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 867		       struct fib6_result *res, int strict)
 868{
 869	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 870	struct fib6_info *rt0;
 871	bool do_rr = false;
 872	int key_plen;
 873
 874	/* make sure this function or its helpers sets f6i */
 875	res->f6i = NULL;
 876
 877	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 878		goto out;
 879
 880	rt0 = rcu_dereference(fn->rr_ptr);
 881	if (!rt0)
 882		rt0 = leaf;
 883
 884	/* Double check to make sure fn is not an intermediate node
 885	 * and fn->leaf does not points to its child's leaf
 886	 * (This might happen if all routes under fn are deleted from
 887	 * the tree and fib6_repair_tree() is called on the node.)
 888	 */
 889	key_plen = rt0->fib6_dst.plen;
 890#ifdef CONFIG_IPV6_SUBTREES
 891	if (rt0->fib6_src.plen)
 892		key_plen = rt0->fib6_src.plen;
 893#endif
 894	if (fn->fn_bit != key_plen)
 895		goto out;
 896
 897	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 898	if (do_rr) {
 899		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 900
 901		/* no entries matched; do round-robin */
 902		if (!next || next->fib6_metric != rt0->fib6_metric)
 903			next = leaf;
 904
 905		if (next != rt0) {
 906			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 907			/* make sure next is not being deleted from the tree */
 908			if (next->fib6_node)
 909				rcu_assign_pointer(fn->rr_ptr, next);
 910			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 911		}
 912	}
 913
 914out:
 915	if (!res->f6i) {
 916		res->f6i = net->ipv6.fib6_null_entry;
 917		res->nh = res->f6i->fib6_nh;
 918		res->fib6_flags = res->f6i->fib6_flags;
 919		res->fib6_type = res->f6i->fib6_type;
 920	}
 921}
 922
 923static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 924{
 925	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 926	       res->nh->fib_nh_gw_family;
 927}
 928
 929#ifdef CONFIG_IPV6_ROUTE_INFO
 930int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 931		  const struct in6_addr *gwaddr)
 932{
 933	struct net *net = dev_net(dev);
 934	struct route_info *rinfo = (struct route_info *) opt;
 935	struct in6_addr prefix_buf, *prefix;
 936	struct fib6_table *table;
 937	unsigned int pref;
 938	unsigned long lifetime;
 939	struct fib6_info *rt;
 940
 941	if (len < sizeof(struct route_info)) {
 942		return -EINVAL;
 943	}
 944
 945	/* Sanity check for prefix_len and length */
 946	if (rinfo->length > 3) {
 947		return -EINVAL;
 948	} else if (rinfo->prefix_len > 128) {
 949		return -EINVAL;
 950	} else if (rinfo->prefix_len > 64) {
 951		if (rinfo->length < 2) {
 952			return -EINVAL;
 953		}
 954	} else if (rinfo->prefix_len > 0) {
 955		if (rinfo->length < 1) {
 956			return -EINVAL;
 957		}
 958	}
 959
 960	pref = rinfo->route_pref;
 961	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 962		return -EINVAL;
 963
 964	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 965
 966	if (rinfo->length == 3)
 967		prefix = (struct in6_addr *)rinfo->prefix;
 968	else {
 969		/* this function is safe */
 970		ipv6_addr_prefix(&prefix_buf,
 971				 (struct in6_addr *)rinfo->prefix,
 972				 rinfo->prefix_len);
 973		prefix = &prefix_buf;
 974	}
 975
 976	if (rinfo->prefix_len == 0)
 977		rt = rt6_get_dflt_router(net, gwaddr, dev);
 978	else
 979		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 980					gwaddr, dev);
 981
 982	if (rt && !lifetime) {
 983		ip6_del_rt(net, rt, false);
 984		rt = NULL;
 985	}
 986
 987	if (!rt && lifetime)
 988		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 989					dev, pref);
 990	else if (rt)
 991		rt->fib6_flags = RTF_ROUTEINFO |
 992				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 993
 994	if (rt) {
 995		table = rt->fib6_table;
 996		spin_lock_bh(&table->tb6_lock);
 
 
 997
 998		if (!addrconf_finite_timeout(lifetime)) {
 999			fib6_clean_expires(rt);
1000			fib6_remove_gc_list(rt);
1001		} else {
1002			fib6_set_expires(rt, jiffies + HZ * lifetime);
1003			fib6_add_gc_list(rt);
1004		}
1005
1006		spin_unlock_bh(&table->tb6_lock);
1007
1008		fib6_info_release(rt);
1009	}
1010	return 0;
1011}
1012#endif
1013
1014/*
1015 *	Misc support functions
1016 */
1017
1018/* called with rcu_lock held */
1019static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1020{
1021	struct net_device *dev = res->nh->fib_nh_dev;
1022
1023	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1024		/* for copies of local routes, dst->dev needs to be the
1025		 * device if it is a master device, the master device if
1026		 * device is enslaved, and the loopback as the default
1027		 */
1028		if (netif_is_l3_slave(dev) &&
1029		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1030			dev = l3mdev_master_dev_rcu(dev);
1031		else if (!netif_is_l3_master(dev))
1032			dev = dev_net(dev)->loopback_dev;
1033		/* last case is netif_is_l3_master(dev) is true in which
1034		 * case we want dev returned to be dev
1035		 */
1036	}
1037
1038	return dev;
1039}
1040
1041static const int fib6_prop[RTN_MAX + 1] = {
1042	[RTN_UNSPEC]	= 0,
1043	[RTN_UNICAST]	= 0,
1044	[RTN_LOCAL]	= 0,
1045	[RTN_BROADCAST]	= 0,
1046	[RTN_ANYCAST]	= 0,
1047	[RTN_MULTICAST]	= 0,
1048	[RTN_BLACKHOLE]	= -EINVAL,
1049	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1050	[RTN_PROHIBIT]	= -EACCES,
1051	[RTN_THROW]	= -EAGAIN,
1052	[RTN_NAT]	= -EINVAL,
1053	[RTN_XRESOLVE]	= -EINVAL,
1054};
1055
1056static int ip6_rt_type_to_error(u8 fib6_type)
1057{
1058	return fib6_prop[fib6_type];
1059}
1060
1061static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1062{
1063	unsigned short flags = 0;
1064
1065	if (rt->dst_nocount)
1066		flags |= DST_NOCOUNT;
1067	if (rt->dst_nopolicy)
1068		flags |= DST_NOPOLICY;
1069
1070	return flags;
1071}
1072
1073static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1074{
1075	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1076
1077	switch (fib6_type) {
1078	case RTN_BLACKHOLE:
1079		rt->dst.output = dst_discard_out;
1080		rt->dst.input = dst_discard;
1081		break;
1082	case RTN_PROHIBIT:
1083		rt->dst.output = ip6_pkt_prohibit_out;
1084		rt->dst.input = ip6_pkt_prohibit;
1085		break;
1086	case RTN_THROW:
1087	case RTN_UNREACHABLE:
1088	default:
1089		rt->dst.output = ip6_pkt_discard_out;
1090		rt->dst.input = ip6_pkt_discard;
1091		break;
1092	}
1093}
1094
1095static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1096{
1097	struct fib6_info *f6i = res->f6i;
1098
1099	if (res->fib6_flags & RTF_REJECT) {
1100		ip6_rt_init_dst_reject(rt, res->fib6_type);
1101		return;
1102	}
1103
1104	rt->dst.error = 0;
1105	rt->dst.output = ip6_output;
1106
1107	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1108		rt->dst.input = ip6_input;
1109	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1110		rt->dst.input = ip6_mc_input;
1111	} else {
1112		rt->dst.input = ip6_forward;
1113	}
1114
1115	if (res->nh->fib_nh_lws) {
1116		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1117		lwtunnel_set_redirect(&rt->dst);
1118	}
1119
1120	rt->dst.lastuse = jiffies;
1121}
1122
1123/* Caller must already hold reference to @from */
1124static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1125{
1126	rt->rt6i_flags &= ~RTF_EXPIRES;
1127	rcu_assign_pointer(rt->from, from);
1128	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1129}
1130
1131/* Caller must already hold reference to f6i in result */
1132static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1133{
1134	const struct fib6_nh *nh = res->nh;
1135	const struct net_device *dev = nh->fib_nh_dev;
1136	struct fib6_info *f6i = res->f6i;
1137
1138	ip6_rt_init_dst(rt, res);
1139
1140	rt->rt6i_dst = f6i->fib6_dst;
1141	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1142	rt->rt6i_flags = res->fib6_flags;
1143	if (nh->fib_nh_gw_family) {
1144		rt->rt6i_gateway = nh->fib_nh_gw6;
1145		rt->rt6i_flags |= RTF_GATEWAY;
1146	}
1147	rt6_set_from(rt, f6i);
1148#ifdef CONFIG_IPV6_SUBTREES
1149	rt->rt6i_src = f6i->fib6_src;
1150#endif
1151}
1152
1153static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1154					struct in6_addr *saddr)
1155{
1156	struct fib6_node *pn, *sn;
1157	while (1) {
1158		if (fn->fn_flags & RTN_TL_ROOT)
1159			return NULL;
1160		pn = rcu_dereference(fn->parent);
1161		sn = FIB6_SUBTREE(pn);
1162		if (sn && sn != fn)
1163			fn = fib6_node_lookup(sn, NULL, saddr);
1164		else
1165			fn = pn;
1166		if (fn->fn_flags & RTN_RTINFO)
1167			return fn;
1168	}
1169}
1170
1171static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1172{
1173	struct rt6_info *rt = *prt;
1174
1175	if (dst_hold_safe(&rt->dst))
1176		return true;
1177	if (net) {
1178		rt = net->ipv6.ip6_null_entry;
1179		dst_hold(&rt->dst);
1180	} else {
1181		rt = NULL;
1182	}
1183	*prt = rt;
1184	return false;
1185}
1186
1187/* called with rcu_lock held */
1188static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1189{
1190	struct net_device *dev = res->nh->fib_nh_dev;
1191	struct fib6_info *f6i = res->f6i;
1192	unsigned short flags;
1193	struct rt6_info *nrt;
1194
1195	if (!fib6_info_hold_safe(f6i))
1196		goto fallback;
1197
1198	flags = fib6_info_dst_flags(f6i);
1199	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1200	if (!nrt) {
1201		fib6_info_release(f6i);
1202		goto fallback;
1203	}
1204
1205	ip6_rt_copy_init(nrt, res);
1206	return nrt;
1207
1208fallback:
1209	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1210	dst_hold(&nrt->dst);
1211	return nrt;
1212}
1213
1214INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1215					     struct fib6_table *table,
1216					     struct flowi6 *fl6,
1217					     const struct sk_buff *skb,
1218					     int flags)
1219{
1220	struct fib6_result res = {};
1221	struct fib6_node *fn;
1222	struct rt6_info *rt;
1223
1224	rcu_read_lock();
1225	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1226restart:
1227	res.f6i = rcu_dereference(fn->leaf);
1228	if (!res.f6i)
1229		res.f6i = net->ipv6.fib6_null_entry;
1230	else
1231		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1232				 flags);
1233
1234	if (res.f6i == net->ipv6.fib6_null_entry) {
1235		fn = fib6_backtrack(fn, &fl6->saddr);
1236		if (fn)
1237			goto restart;
1238
1239		rt = net->ipv6.ip6_null_entry;
1240		dst_hold(&rt->dst);
1241		goto out;
1242	} else if (res.fib6_flags & RTF_REJECT) {
1243		goto do_create;
1244	}
 
 
1245
1246	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1247			 fl6->flowi6_oif != 0, skb, flags);
1248
1249	/* Search through exception table */
1250	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1251	if (rt) {
1252		if (ip6_hold_safe(net, &rt))
1253			dst_use_noref(&rt->dst, jiffies);
1254	} else {
1255do_create:
1256		rt = ip6_create_rt_rcu(&res);
1257	}
1258
1259out:
1260	trace_fib6_table_lookup(net, &res, table, fl6);
1261
1262	rcu_read_unlock();
1263
1264	return rt;
1265}
1266
1267struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1268				   const struct sk_buff *skb, int flags)
1269{
1270	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1271}
1272EXPORT_SYMBOL_GPL(ip6_route_lookup);
1273
1274struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1275			    const struct in6_addr *saddr, int oif,
1276			    const struct sk_buff *skb, int strict)
1277{
1278	struct flowi6 fl6 = {
1279		.flowi6_oif = oif,
1280		.daddr = *daddr,
1281	};
1282	struct dst_entry *dst;
1283	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1284
1285	if (saddr) {
1286		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1287		flags |= RT6_LOOKUP_F_HAS_SADDR;
1288	}
1289
1290	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1291	if (dst->error == 0)
1292		return dst_rt6_info(dst);
1293
1294	dst_release(dst);
1295
1296	return NULL;
1297}
1298EXPORT_SYMBOL(rt6_lookup);
1299
1300/* ip6_ins_rt is called with FREE table->tb6_lock.
1301 * It takes new route entry, the addition fails by any reason the
1302 * route is released.
1303 * Caller must hold dst before calling it.
1304 */
1305
1306static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1307			struct netlink_ext_ack *extack)
1308{
1309	int err;
1310	struct fib6_table *table;
1311
1312	table = rt->fib6_table;
1313	spin_lock_bh(&table->tb6_lock);
1314	err = fib6_add(&table->tb6_root, rt, info, extack);
1315	spin_unlock_bh(&table->tb6_lock);
1316
1317	return err;
1318}
1319
1320int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1321{
1322	struct nl_info info = {	.nl_net = net, };
 
1323
1324	return __ip6_ins_rt(rt, &info, NULL);
1325}
1326
1327static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1328					   const struct in6_addr *daddr,
1329					   const struct in6_addr *saddr)
1330{
1331	struct fib6_info *f6i = res->f6i;
1332	struct net_device *dev;
1333	struct rt6_info *rt;
1334
1335	/*
1336	 *	Clone the route.
1337	 */
1338
1339	if (!fib6_info_hold_safe(f6i))
1340		return NULL;
 
 
1341
1342	dev = ip6_rt_get_dev_rcu(res);
1343	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1344	if (!rt) {
1345		fib6_info_release(f6i);
1346		return NULL;
1347	}
1348
1349	ip6_rt_copy_init(rt, res);
1350	rt->rt6i_flags |= RTF_CACHE;
 
 
1351	rt->rt6i_dst.addr = *daddr;
1352	rt->rt6i_dst.plen = 128;
1353
1354	if (!rt6_is_gw_or_nonexthop(res)) {
1355		if (f6i->fib6_dst.plen != 128 &&
1356		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1357			rt->rt6i_flags |= RTF_ANYCAST;
1358#ifdef CONFIG_IPV6_SUBTREES
1359		if (rt->rt6i_src.plen && saddr) {
1360			rt->rt6i_src.addr = *saddr;
1361			rt->rt6i_src.plen = 128;
1362		}
1363#endif
1364	}
1365
1366	return rt;
1367}
1368
1369static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1370{
1371	struct fib6_info *f6i = res->f6i;
1372	unsigned short flags = fib6_info_dst_flags(f6i);
1373	struct net_device *dev;
1374	struct rt6_info *pcpu_rt;
1375
1376	if (!fib6_info_hold_safe(f6i))
1377		return NULL;
1378
1379	rcu_read_lock();
1380	dev = ip6_rt_get_dev_rcu(res);
1381	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1382	rcu_read_unlock();
1383	if (!pcpu_rt) {
1384		fib6_info_release(f6i);
1385		return NULL;
1386	}
1387	ip6_rt_copy_init(pcpu_rt, res);
1388	pcpu_rt->rt6i_flags |= RTF_PCPU;
1389
1390	if (f6i->nh)
1391		pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1392
1393	return pcpu_rt;
1394}
1395
1396static bool rt6_is_valid(const struct rt6_info *rt6)
1397{
1398	return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1399}
1400
1401/* It should be called with rcu_read_lock() acquired */
1402static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1403{
1404	struct rt6_info *pcpu_rt;
1405
1406	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1407
1408	if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1409		struct rt6_info *prev, **p;
1410
1411		p = this_cpu_ptr(res->nh->rt6i_pcpu);
1412		prev = xchg(p, NULL);
1413		if (prev) {
1414			dst_dev_put(&prev->dst);
1415			dst_release(&prev->dst);
1416		}
1417
1418		pcpu_rt = NULL;
 
 
1419	}
1420
1421	return pcpu_rt;
1422}
1423
1424static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1425					    const struct fib6_result *res)
1426{
 
1427	struct rt6_info *pcpu_rt, *prev, **p;
1428
1429	pcpu_rt = ip6_rt_pcpu_alloc(res);
1430	if (!pcpu_rt)
1431		return NULL;
1432
1433	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1434	prev = cmpxchg(p, NULL, pcpu_rt);
1435	BUG_ON(prev);
1436
1437	if (res->f6i->fib6_destroying) {
1438		struct fib6_info *from;
1439
1440		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1441		fib6_info_release(from);
1442	}
1443
1444	return pcpu_rt;
1445}
1446
1447/* exception hash table implementation
1448 */
1449static DEFINE_SPINLOCK(rt6_exception_lock);
1450
1451/* Remove rt6_ex from hash table and free the memory
1452 * Caller must hold rt6_exception_lock
1453 */
1454static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1455				 struct rt6_exception *rt6_ex)
1456{
1457	struct fib6_info *from;
1458	struct net *net;
1459
1460	if (!bucket || !rt6_ex)
1461		return;
1462
1463	net = dev_net(rt6_ex->rt6i->dst.dev);
1464	net->ipv6.rt6_stats->fib_rt_cache--;
1465
1466	/* purge completely the exception to allow releasing the held resources:
1467	 * some [sk] cache may keep the dst around for unlimited time
1468	 */
1469	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1470	fib6_info_release(from);
1471	dst_dev_put(&rt6_ex->rt6i->dst);
1472
1473	hlist_del_rcu(&rt6_ex->hlist);
1474	dst_release(&rt6_ex->rt6i->dst);
1475	kfree_rcu(rt6_ex, rcu);
1476	WARN_ON_ONCE(!bucket->depth);
1477	bucket->depth--;
1478}
1479
1480/* Remove oldest rt6_ex in bucket and free the memory
1481 * Caller must hold rt6_exception_lock
1482 */
1483static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1484{
1485	struct rt6_exception *rt6_ex, *oldest = NULL;
1486
1487	if (!bucket)
1488		return;
1489
1490	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1491		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1492			oldest = rt6_ex;
1493	}
1494	rt6_remove_exception(bucket, oldest);
1495}
1496
1497static u32 rt6_exception_hash(const struct in6_addr *dst,
1498			      const struct in6_addr *src)
1499{
1500	static siphash_aligned_key_t rt6_exception_key;
1501	struct {
1502		struct in6_addr dst;
1503		struct in6_addr src;
1504	} __aligned(SIPHASH_ALIGNMENT) combined = {
1505		.dst = *dst,
1506	};
1507	u64 val;
1508
1509	net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
1510
1511#ifdef CONFIG_IPV6_SUBTREES
1512	if (src)
1513		combined.src = *src;
1514#endif
1515	val = siphash(&combined, sizeof(combined), &rt6_exception_key);
1516
1517	return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1518}
1519
1520/* Helper function to find the cached rt in the hash table
1521 * and update bucket pointer to point to the bucket for this
1522 * (daddr, saddr) pair
1523 * Caller must hold rt6_exception_lock
1524 */
1525static struct rt6_exception *
1526__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1527			      const struct in6_addr *daddr,
1528			      const struct in6_addr *saddr)
1529{
1530	struct rt6_exception *rt6_ex;
1531	u32 hval;
1532
1533	if (!(*bucket) || !daddr)
1534		return NULL;
1535
1536	hval = rt6_exception_hash(daddr, saddr);
1537	*bucket += hval;
1538
1539	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1540		struct rt6_info *rt6 = rt6_ex->rt6i;
1541		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1542
1543#ifdef CONFIG_IPV6_SUBTREES
1544		if (matched && saddr)
1545			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1546#endif
1547		if (matched)
1548			return rt6_ex;
1549	}
1550	return NULL;
1551}
1552
1553/* Helper function to find the cached rt in the hash table
1554 * and update bucket pointer to point to the bucket for this
1555 * (daddr, saddr) pair
1556 * Caller must hold rcu_read_lock()
1557 */
1558static struct rt6_exception *
1559__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1560			 const struct in6_addr *daddr,
1561			 const struct in6_addr *saddr)
1562{
1563	struct rt6_exception *rt6_ex;
1564	u32 hval;
1565
1566	WARN_ON_ONCE(!rcu_read_lock_held());
1567
1568	if (!(*bucket) || !daddr)
1569		return NULL;
1570
1571	hval = rt6_exception_hash(daddr, saddr);
1572	*bucket += hval;
1573
1574	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1575		struct rt6_info *rt6 = rt6_ex->rt6i;
1576		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1577
1578#ifdef CONFIG_IPV6_SUBTREES
1579		if (matched && saddr)
1580			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1581#endif
1582		if (matched)
1583			return rt6_ex;
1584	}
1585	return NULL;
1586}
1587
1588static unsigned int fib6_mtu(const struct fib6_result *res)
1589{
1590	const struct fib6_nh *nh = res->nh;
1591	unsigned int mtu;
1592
1593	if (res->f6i->fib6_pmtu) {
1594		mtu = res->f6i->fib6_pmtu;
1595	} else {
1596		struct net_device *dev = nh->fib_nh_dev;
1597		struct inet6_dev *idev;
1598
1599		rcu_read_lock();
1600		idev = __in6_dev_get(dev);
1601		mtu = READ_ONCE(idev->cnf.mtu6);
1602		rcu_read_unlock();
1603	}
1604
1605	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1606
1607	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1608}
1609
1610#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1611
1612/* used when the flushed bit is not relevant, only access to the bucket
1613 * (ie., all bucket users except rt6_insert_exception);
1614 *
1615 * called under rcu lock; sometimes called with rt6_exception_lock held
1616 */
1617static
1618struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1619						       spinlock_t *lock)
1620{
1621	struct rt6_exception_bucket *bucket;
1622
1623	if (lock)
1624		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1625						   lockdep_is_held(lock));
1626	else
1627		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1628
1629	/* remove bucket flushed bit if set */
1630	if (bucket) {
1631		unsigned long p = (unsigned long)bucket;
1632
1633		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1634		bucket = (struct rt6_exception_bucket *)p;
1635	}
1636
1637	return bucket;
1638}
1639
1640static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1641{
1642	unsigned long p = (unsigned long)bucket;
1643
1644	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1645}
1646
1647/* called with rt6_exception_lock held */
1648static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1649					      spinlock_t *lock)
1650{
1651	struct rt6_exception_bucket *bucket;
1652	unsigned long p;
1653
1654	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1655					   lockdep_is_held(lock));
1656
1657	p = (unsigned long)bucket;
1658	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1659	bucket = (struct rt6_exception_bucket *)p;
1660	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1661}
1662
1663static int rt6_insert_exception(struct rt6_info *nrt,
1664				const struct fib6_result *res)
1665{
1666	struct net *net = dev_net(nrt->dst.dev);
1667	struct rt6_exception_bucket *bucket;
1668	struct fib6_info *f6i = res->f6i;
1669	struct in6_addr *src_key = NULL;
1670	struct rt6_exception *rt6_ex;
1671	struct fib6_nh *nh = res->nh;
1672	int max_depth;
1673	int err = 0;
1674
1675	spin_lock_bh(&rt6_exception_lock);
1676
1677	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1678					  lockdep_is_held(&rt6_exception_lock));
1679	if (!bucket) {
1680		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1681				 GFP_ATOMIC);
1682		if (!bucket) {
1683			err = -ENOMEM;
1684			goto out;
1685		}
1686		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1687	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1688		err = -EINVAL;
1689		goto out;
1690	}
1691
1692#ifdef CONFIG_IPV6_SUBTREES
1693	/* fib6_src.plen != 0 indicates f6i is in subtree
1694	 * and exception table is indexed by a hash of
1695	 * both fib6_dst and fib6_src.
1696	 * Otherwise, the exception table is indexed by
1697	 * a hash of only fib6_dst.
1698	 */
1699	if (f6i->fib6_src.plen)
1700		src_key = &nrt->rt6i_src.addr;
1701#endif
1702	/* rt6_mtu_change() might lower mtu on f6i.
1703	 * Only insert this exception route if its mtu
1704	 * is less than f6i's mtu value.
1705	 */
1706	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1707		err = -EINVAL;
1708		goto out;
1709	}
1710
1711	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1712					       src_key);
1713	if (rt6_ex)
1714		rt6_remove_exception(bucket, rt6_ex);
1715
1716	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1717	if (!rt6_ex) {
1718		err = -ENOMEM;
1719		goto out;
1720	}
1721	rt6_ex->rt6i = nrt;
1722	rt6_ex->stamp = jiffies;
1723	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1724	bucket->depth++;
1725	net->ipv6.rt6_stats->fib_rt_cache++;
1726
1727	/* Randomize max depth to avoid some side channels attacks. */
1728	max_depth = FIB6_MAX_DEPTH + get_random_u32_below(FIB6_MAX_DEPTH);
1729	while (bucket->depth > max_depth)
1730		rt6_exception_remove_oldest(bucket);
1731
1732out:
1733	spin_unlock_bh(&rt6_exception_lock);
1734
1735	/* Update fn->fn_sernum to invalidate all cached dst */
1736	if (!err) {
1737		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1738		fib6_update_sernum(net, f6i);
1739		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1740		fib6_force_start_gc(net);
1741	}
1742
1743	return err;
1744}
1745
1746static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1747{
1748	struct rt6_exception_bucket *bucket;
1749	struct rt6_exception *rt6_ex;
1750	struct hlist_node *tmp;
1751	int i;
1752
1753	spin_lock_bh(&rt6_exception_lock);
1754
1755	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1756	if (!bucket)
1757		goto out;
1758
1759	/* Prevent rt6_insert_exception() to recreate the bucket list */
1760	if (!from)
1761		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1762
1763	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1764		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1765			if (!from ||
1766			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1767				rt6_remove_exception(bucket, rt6_ex);
1768		}
1769		WARN_ON_ONCE(!from && bucket->depth);
1770		bucket++;
1771	}
1772out:
1773	spin_unlock_bh(&rt6_exception_lock);
1774}
1775
1776static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1777{
1778	struct fib6_info *f6i = arg;
1779
1780	fib6_nh_flush_exceptions(nh, f6i);
1781
1782	return 0;
1783}
1784
1785void rt6_flush_exceptions(struct fib6_info *f6i)
1786{
1787	if (f6i->nh)
1788		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1789					 f6i);
1790	else
1791		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1792}
1793
1794/* Find cached rt in the hash table inside passed in rt
1795 * Caller has to hold rcu_read_lock()
1796 */
1797static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1798					   const struct in6_addr *daddr,
1799					   const struct in6_addr *saddr)
1800{
1801	const struct in6_addr *src_key = NULL;
1802	struct rt6_exception_bucket *bucket;
1803	struct rt6_exception *rt6_ex;
1804	struct rt6_info *ret = NULL;
1805
1806#ifdef CONFIG_IPV6_SUBTREES
1807	/* fib6i_src.plen != 0 indicates f6i is in subtree
1808	 * and exception table is indexed by a hash of
1809	 * both fib6_dst and fib6_src.
1810	 * However, the src addr used to create the hash
1811	 * might not be exactly the passed in saddr which
1812	 * is a /128 addr from the flow.
1813	 * So we need to use f6i->fib6_src to redo lookup
1814	 * if the passed in saddr does not find anything.
1815	 * (See the logic in ip6_rt_cache_alloc() on how
1816	 * rt->rt6i_src is updated.)
1817	 */
1818	if (res->f6i->fib6_src.plen)
1819		src_key = saddr;
1820find_ex:
1821#endif
1822	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1823	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1824
1825	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1826		ret = rt6_ex->rt6i;
1827
1828#ifdef CONFIG_IPV6_SUBTREES
1829	/* Use fib6_src as src_key and redo lookup */
1830	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1831		src_key = &res->f6i->fib6_src.addr;
1832		goto find_ex;
1833	}
1834#endif
1835
1836	return ret;
1837}
1838
1839/* Remove the passed in cached rt from the hash table that contains it */
1840static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1841				    const struct rt6_info *rt)
1842{
1843	const struct in6_addr *src_key = NULL;
1844	struct rt6_exception_bucket *bucket;
1845	struct rt6_exception *rt6_ex;
1846	int err;
1847
1848	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1849		return -ENOENT;
1850
1851	spin_lock_bh(&rt6_exception_lock);
1852	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1853
1854#ifdef CONFIG_IPV6_SUBTREES
1855	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1856	 * and exception table is indexed by a hash of
1857	 * both rt6i_dst and rt6i_src.
1858	 * Otherwise, the exception table is indexed by
1859	 * a hash of only rt6i_dst.
1860	 */
1861	if (plen)
1862		src_key = &rt->rt6i_src.addr;
1863#endif
1864	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1865					       &rt->rt6i_dst.addr,
1866					       src_key);
1867	if (rt6_ex) {
1868		rt6_remove_exception(bucket, rt6_ex);
1869		err = 0;
1870	} else {
1871		err = -ENOENT;
 
 
 
 
 
 
 
1872	}
1873
1874	spin_unlock_bh(&rt6_exception_lock);
1875	return err;
 
1876}
1877
1878struct fib6_nh_excptn_arg {
1879	struct rt6_info	*rt;
1880	int		plen;
1881};
1882
1883static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1884{
1885	struct fib6_nh_excptn_arg *arg = _arg;
1886	int err;
 
1887
1888	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1889	if (err == 0)
1890		return 1;
1891
1892	return 0;
1893}
1894
1895static int rt6_remove_exception_rt(struct rt6_info *rt)
1896{
1897	struct fib6_info *from;
1898
1899	from = rcu_dereference(rt->from);
1900	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1901		return -EINVAL;
1902
1903	if (from->nh) {
1904		struct fib6_nh_excptn_arg arg = {
1905			.rt = rt,
1906			.plen = from->fib6_src.plen
1907		};
1908		int rc;
1909
1910		/* rc = 1 means an entry was found */
1911		rc = nexthop_for_each_fib6_nh(from->nh,
1912					      rt6_nh_remove_exception_rt,
1913					      &arg);
1914		return rc ? 0 : -ENOENT;
1915	}
1916
1917	return fib6_nh_remove_exception(from->fib6_nh,
1918					from->fib6_src.plen, rt);
1919}
1920
1921/* Find rt6_ex which contains the passed in rt cache and
1922 * refresh its stamp
1923 */
1924static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1925				     const struct rt6_info *rt)
1926{
1927	const struct in6_addr *src_key = NULL;
1928	struct rt6_exception_bucket *bucket;
1929	struct rt6_exception *rt6_ex;
1930
1931	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1932#ifdef CONFIG_IPV6_SUBTREES
1933	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1934	 * and exception table is indexed by a hash of
1935	 * both rt6i_dst and rt6i_src.
1936	 * Otherwise, the exception table is indexed by
1937	 * a hash of only rt6i_dst.
1938	 */
1939	if (plen)
1940		src_key = &rt->rt6i_src.addr;
1941#endif
1942	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1943	if (rt6_ex)
1944		rt6_ex->stamp = jiffies;
1945}
1946
1947struct fib6_nh_match_arg {
1948	const struct net_device *dev;
1949	const struct in6_addr	*gw;
1950	struct fib6_nh		*match;
1951};
1952
1953/* determine if fib6_nh has given device and gateway */
1954static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1955{
1956	struct fib6_nh_match_arg *arg = _arg;
1957
1958	if (arg->dev != nh->fib_nh_dev ||
1959	    (arg->gw && !nh->fib_nh_gw_family) ||
1960	    (!arg->gw && nh->fib_nh_gw_family) ||
1961	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1962		return 0;
1963
1964	arg->match = nh;
1965
1966	/* found a match, break the loop */
1967	return 1;
1968}
1969
1970static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1971{
1972	struct fib6_info *from;
1973	struct fib6_nh *fib6_nh;
1974
1975	rcu_read_lock();
1976
1977	from = rcu_dereference(rt->from);
1978	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1979		goto unlock;
1980
1981	if (from->nh) {
1982		struct fib6_nh_match_arg arg = {
1983			.dev = rt->dst.dev,
1984			.gw = &rt->rt6i_gateway,
1985		};
1986
1987		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1988
1989		if (!arg.match)
1990			goto unlock;
1991		fib6_nh = arg.match;
1992	} else {
1993		fib6_nh = from->fib6_nh;
1994	}
1995	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1996unlock:
1997	rcu_read_unlock();
1998}
1999
2000static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
2001					 struct rt6_info *rt, int mtu)
2002{
2003	/* If the new MTU is lower than the route PMTU, this new MTU will be the
2004	 * lowest MTU in the path: always allow updating the route PMTU to
2005	 * reflect PMTU decreases.
2006	 *
2007	 * If the new MTU is higher, and the route PMTU is equal to the local
2008	 * MTU, this means the old MTU is the lowest in the path, so allow
2009	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
2010	 * handle this.
2011	 */
2012
2013	if (dst_mtu(&rt->dst) >= mtu)
2014		return true;
2015
2016	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2017		return true;
2018
2019	return false;
2020}
2021
2022static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2023				       const struct fib6_nh *nh, int mtu)
2024{
2025	struct rt6_exception_bucket *bucket;
2026	struct rt6_exception *rt6_ex;
2027	int i;
2028
2029	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2030	if (!bucket)
2031		return;
2032
2033	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2034		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2035			struct rt6_info *entry = rt6_ex->rt6i;
2036
2037			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2038			 * route), the metrics of its rt->from have already
2039			 * been updated.
2040			 */
2041			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2042			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2043				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2044		}
2045		bucket++;
2046	}
2047}
2048
2049#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2050
2051static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2052					    const struct in6_addr *gateway)
2053{
2054	struct rt6_exception_bucket *bucket;
2055	struct rt6_exception *rt6_ex;
2056	struct hlist_node *tmp;
2057	int i;
2058
2059	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2060		return;
2061
2062	spin_lock_bh(&rt6_exception_lock);
2063	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2064	if (bucket) {
2065		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2066			hlist_for_each_entry_safe(rt6_ex, tmp,
2067						  &bucket->chain, hlist) {
2068				struct rt6_info *entry = rt6_ex->rt6i;
2069
2070				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2071				    RTF_CACHE_GATEWAY &&
2072				    ipv6_addr_equal(gateway,
2073						    &entry->rt6i_gateway)) {
2074					rt6_remove_exception(bucket, rt6_ex);
2075				}
2076			}
2077			bucket++;
2078		}
2079	}
2080
2081	spin_unlock_bh(&rt6_exception_lock);
2082}
2083
2084static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2085				      struct rt6_exception *rt6_ex,
2086				      struct fib6_gc_args *gc_args,
2087				      unsigned long now)
2088{
2089	struct rt6_info *rt = rt6_ex->rt6i;
2090
2091	/* we are pruning and obsoleting aged-out and non gateway exceptions
2092	 * even if others have still references to them, so that on next
2093	 * dst_check() such references can be dropped.
2094	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2095	 * expired, independently from their aging, as per RFC 8201 section 4
2096	 */
2097	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2098		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2099			pr_debug("aging clone %p\n", rt);
2100			rt6_remove_exception(bucket, rt6_ex);
2101			return;
2102		}
2103	} else if (time_after(jiffies, rt->dst.expires)) {
2104		pr_debug("purging expired route %p\n", rt);
2105		rt6_remove_exception(bucket, rt6_ex);
2106		return;
2107	}
2108
2109	if (rt->rt6i_flags & RTF_GATEWAY) {
2110		struct neighbour *neigh;
2111
2112		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2113
2114		if (!(neigh && (neigh->flags & NTF_ROUTER))) {
2115			pr_debug("purging route %p via non-router but gateway\n",
2116				 rt);
2117			rt6_remove_exception(bucket, rt6_ex);
2118			return;
2119		}
2120	}
2121
2122	gc_args->more++;
2123}
2124
2125static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2126				   struct fib6_gc_args *gc_args,
2127				   unsigned long now)
2128{
2129	struct rt6_exception_bucket *bucket;
2130	struct rt6_exception *rt6_ex;
2131	struct hlist_node *tmp;
2132	int i;
2133
2134	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2135		return;
2136
2137	rcu_read_lock_bh();
2138	spin_lock(&rt6_exception_lock);
2139	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2140	if (bucket) {
2141		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2142			hlist_for_each_entry_safe(rt6_ex, tmp,
2143						  &bucket->chain, hlist) {
2144				rt6_age_examine_exception(bucket, rt6_ex,
2145							  gc_args, now);
2146			}
2147			bucket++;
2148		}
2149	}
2150	spin_unlock(&rt6_exception_lock);
2151	rcu_read_unlock_bh();
2152}
2153
2154struct fib6_nh_age_excptn_arg {
2155	struct fib6_gc_args	*gc_args;
2156	unsigned long		now;
2157};
2158
2159static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2160{
2161	struct fib6_nh_age_excptn_arg *arg = _arg;
2162
2163	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2164	return 0;
2165}
2166
2167void rt6_age_exceptions(struct fib6_info *f6i,
2168			struct fib6_gc_args *gc_args,
2169			unsigned long now)
2170{
2171	if (f6i->nh) {
2172		struct fib6_nh_age_excptn_arg arg = {
2173			.gc_args = gc_args,
2174			.now = now
2175		};
2176
2177		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2178					 &arg);
2179	} else {
2180		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2181	}
2182}
2183
2184/* must be called with rcu lock held */
2185int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2186		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2187{
2188	struct fib6_node *fn, *saved_fn;
2189
2190	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2191	saved_fn = fn;
2192
2193redo_rt6_select:
2194	rt6_select(net, fn, oif, res, strict);
2195	if (res->f6i == net->ipv6.fib6_null_entry) {
 
 
2196		fn = fib6_backtrack(fn, &fl6->saddr);
2197		if (fn)
2198			goto redo_rt6_select;
2199		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2200			/* also consider unreachable route */
2201			strict &= ~RT6_LOOKUP_F_REACHABLE;
2202			fn = saved_fn;
2203			goto redo_rt6_select;
2204		}
2205	}
2206
2207	trace_fib6_table_lookup(net, res, table, fl6);
2208
2209	return 0;
2210}
2211
2212struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2213			       int oif, struct flowi6 *fl6,
2214			       const struct sk_buff *skb, int flags)
2215{
2216	struct fib6_result res = {};
2217	struct rt6_info *rt = NULL;
2218	int strict = 0;
2219
2220	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2221		     !rcu_read_lock_held());
2222
2223	strict |= flags & RT6_LOOKUP_F_IFACE;
2224	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2225	if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
2226		strict |= RT6_LOOKUP_F_REACHABLE;
2227
2228	rcu_read_lock();
2229
2230	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2231	if (res.f6i == net->ipv6.fib6_null_entry)
2232		goto out;
2233
2234	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2235
2236	/*Search through exception table */
2237	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2238	if (rt) {
2239		goto out;
2240	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2241			    !res.nh->fib_nh_gw_family)) {
2242		/* Create a RTF_CACHE clone which will not be
2243		 * owned by the fib6 tree.  It is for the special case where
2244		 * the daddr in the skb during the neighbor look-up is different
2245		 * from the fl6->daddr used to look-up route here.
2246		 */
2247		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2248
2249		if (rt) {
2250			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2251			 * As rt6_uncached_list_add() does not consume refcnt,
2252			 * this refcnt is always returned to the caller even
2253			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2254			 */
2255			rt6_uncached_list_add(rt);
2256			rcu_read_unlock();
2257
2258			return rt;
2259		}
2260	} else {
2261		/* Get a percpu copy */
2262		local_bh_disable();
2263		rt = rt6_get_pcpu_route(&res);
2264
2265		if (!rt)
2266			rt = rt6_make_pcpu_route(net, &res);
2267
2268		local_bh_enable();
2269	}
2270out:
2271	if (!rt)
2272		rt = net->ipv6.ip6_null_entry;
2273	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2274		ip6_hold_safe(net, &rt);
2275	rcu_read_unlock();
2276
2277	return rt;
2278}
2279EXPORT_SYMBOL_GPL(ip6_pol_route);
2280
2281INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2282					    struct fib6_table *table,
2283					    struct flowi6 *fl6,
2284					    const struct sk_buff *skb,
2285					    int flags)
2286{
2287	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2288}
2289
2290struct dst_entry *ip6_route_input_lookup(struct net *net,
2291					 struct net_device *dev,
2292					 struct flowi6 *fl6,
2293					 const struct sk_buff *skb,
2294					 int flags)
2295{
2296	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2297		flags |= RT6_LOOKUP_F_IFACE;
2298
2299	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2300}
2301EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2302
2303static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2304				  struct flow_keys *keys,
2305				  struct flow_keys *flkeys)
2306{
2307	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2308	const struct ipv6hdr *key_iph = outer_iph;
2309	struct flow_keys *_flkeys = flkeys;
2310	const struct ipv6hdr *inner_iph;
2311	const struct icmp6hdr *icmph;
2312	struct ipv6hdr _inner_iph;
2313	struct icmp6hdr _icmph;
2314
2315	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2316		goto out;
2317
2318	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2319				   sizeof(_icmph), &_icmph);
2320	if (!icmph)
2321		goto out;
2322
2323	if (!icmpv6_is_err(icmph->icmp6_type))
2324		goto out;
2325
2326	inner_iph = skb_header_pointer(skb,
2327				       skb_transport_offset(skb) + sizeof(*icmph),
2328				       sizeof(_inner_iph), &_inner_iph);
2329	if (!inner_iph)
2330		goto out;
2331
2332	key_iph = inner_iph;
2333	_flkeys = NULL;
2334out:
2335	if (_flkeys) {
2336		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2337		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2338		keys->tags.flow_label = _flkeys->tags.flow_label;
2339		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2340	} else {
2341		keys->addrs.v6addrs.src = key_iph->saddr;
2342		keys->addrs.v6addrs.dst = key_iph->daddr;
2343		keys->tags.flow_label = ip6_flowlabel(key_iph);
2344		keys->basic.ip_proto = key_iph->nexthdr;
2345	}
2346}
2347
2348static u32 rt6_multipath_custom_hash_outer(const struct net *net,
2349					   const struct sk_buff *skb,
2350					   bool *p_has_inner)
2351{
2352	u32 hash_fields = ip6_multipath_hash_fields(net);
2353	struct flow_keys keys, hash_keys;
2354
2355	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2356		return 0;
 
2357
2358	memset(&hash_keys, 0, sizeof(hash_keys));
2359	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
2360
2361	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2362	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2363		hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2364	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2365		hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2366	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2367		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2368	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2369		hash_keys.tags.flow_label = keys.tags.flow_label;
2370	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2371		hash_keys.ports.src = keys.ports.src;
2372	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2373		hash_keys.ports.dst = keys.ports.dst;
2374
2375	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
2376	return flow_hash_from_keys(&hash_keys);
2377}
2378
2379static u32 rt6_multipath_custom_hash_inner(const struct net *net,
2380					   const struct sk_buff *skb,
2381					   bool has_inner)
2382{
2383	u32 hash_fields = ip6_multipath_hash_fields(net);
2384	struct flow_keys keys, hash_keys;
2385
2386	/* We assume the packet carries an encapsulation, but if none was
2387	 * encountered during dissection of the outer flow, then there is no
2388	 * point in calling the flow dissector again.
2389	 */
2390	if (!has_inner)
2391		return 0;
2392
2393	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
2394		return 0;
2395
2396	memset(&hash_keys, 0, sizeof(hash_keys));
2397	skb_flow_dissect_flow_keys(skb, &keys, 0);
2398
2399	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
2400		return 0;
2401
2402	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2403		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2404		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2405			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
2406		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2407			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
2408	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2409		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2410		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
2411			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
2412		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
2413			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
2414		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
2415			hash_keys.tags.flow_label = keys.tags.flow_label;
2416	}
2417
2418	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
2419		hash_keys.basic.ip_proto = keys.basic.ip_proto;
2420	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
2421		hash_keys.ports.src = keys.ports.src;
2422	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
2423		hash_keys.ports.dst = keys.ports.dst;
2424
2425	return flow_hash_from_keys(&hash_keys);
2426}
2427
2428static u32 rt6_multipath_custom_hash_skb(const struct net *net,
2429					 const struct sk_buff *skb)
2430{
2431	u32 mhash, mhash_inner;
2432	bool has_inner = true;
2433
2434	mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
2435	mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
2436
2437	return jhash_2words(mhash, mhash_inner, 0);
2438}
2439
2440static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
2441					 const struct flowi6 *fl6)
 
2442{
2443	u32 hash_fields = ip6_multipath_hash_fields(net);
2444	struct flow_keys hash_keys;
2445
2446	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
2447		return 0;
2448
2449	memset(&hash_keys, 0, sizeof(hash_keys));
2450	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2451	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
2452		hash_keys.addrs.v6addrs.src = fl6->saddr;
2453	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
2454		hash_keys.addrs.v6addrs.dst = fl6->daddr;
2455	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
2456		hash_keys.basic.ip_proto = fl6->flowi6_proto;
2457	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
2458		hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2459	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
2460		hash_keys.ports.src = fl6->fl6_sport;
2461	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
2462		hash_keys.ports.dst = fl6->fl6_dport;
2463
2464	return flow_hash_from_keys(&hash_keys);
2465}
2466
2467/* if skb is set it will be used and fl6 can be NULL */
2468u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2469		       const struct sk_buff *skb, struct flow_keys *flkeys)
2470{
2471	struct flow_keys hash_keys;
2472	u32 mhash = 0;
2473
2474	switch (ip6_multipath_hash_policy(net)) {
2475	case 0:
2476		memset(&hash_keys, 0, sizeof(hash_keys));
2477		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2478		if (skb) {
2479			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2480		} else {
2481			hash_keys.addrs.v6addrs.src = fl6->saddr;
2482			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2483			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2484			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2485		}
2486		mhash = flow_hash_from_keys(&hash_keys);
2487		break;
2488	case 1:
2489		if (skb) {
2490			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2491			struct flow_keys keys;
2492
2493			/* short-circuit if we already have L4 hash present */
2494			if (skb->l4_hash)
2495				return skb_get_hash_raw(skb) >> 1;
2496
2497			memset(&hash_keys, 0, sizeof(hash_keys));
2498
2499			if (!flkeys) {
2500				skb_flow_dissect_flow_keys(skb, &keys, flag);
2501				flkeys = &keys;
2502			}
2503			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2504			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2505			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2506			hash_keys.ports.src = flkeys->ports.src;
2507			hash_keys.ports.dst = flkeys->ports.dst;
2508			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2509		} else {
2510			memset(&hash_keys, 0, sizeof(hash_keys));
2511			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2512			hash_keys.addrs.v6addrs.src = fl6->saddr;
2513			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2514			hash_keys.ports.src = fl6->fl6_sport;
2515			hash_keys.ports.dst = fl6->fl6_dport;
2516			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2517		}
2518		mhash = flow_hash_from_keys(&hash_keys);
2519		break;
2520	case 2:
2521		memset(&hash_keys, 0, sizeof(hash_keys));
2522		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2523		if (skb) {
2524			struct flow_keys keys;
2525
2526			if (!flkeys) {
2527				skb_flow_dissect_flow_keys(skb, &keys, 0);
2528				flkeys = &keys;
2529			}
2530
2531			/* Inner can be v4 or v6 */
2532			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2533				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2534				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2535				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2536			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2537				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2538				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2539				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2540				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2541				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2542			} else {
2543				/* Same as case 0 */
2544				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2545				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2546			}
2547		} else {
2548			/* Same as case 0 */
2549			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2550			hash_keys.addrs.v6addrs.src = fl6->saddr;
2551			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2552			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2553			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2554		}
2555		mhash = flow_hash_from_keys(&hash_keys);
2556		break;
2557	case 3:
2558		if (skb)
2559			mhash = rt6_multipath_custom_hash_skb(net, skb);
2560		else
2561			mhash = rt6_multipath_custom_hash_fl6(net, fl6);
2562		break;
2563	}
2564
2565	return mhash >> 1;
2566}
2567
2568/* Called with rcu held */
2569void ip6_route_input(struct sk_buff *skb)
2570{
2571	const struct ipv6hdr *iph = ipv6_hdr(skb);
2572	struct net *net = dev_net(skb->dev);
2573	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2574	struct ip_tunnel_info *tun_info;
2575	struct flowi6 fl6 = {
2576		.flowi6_iif = skb->dev->ifindex,
2577		.daddr = iph->daddr,
2578		.saddr = iph->saddr,
2579		.flowlabel = ip6_flowinfo(iph),
2580		.flowi6_mark = skb->mark,
2581		.flowi6_proto = iph->nexthdr,
2582	};
2583	struct flow_keys *flkeys = NULL, _flkeys;
2584
2585	tun_info = skb_tunnel_info(skb);
2586	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2587		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2588
2589	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2590		flkeys = &_flkeys;
2591
2592	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2593		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2594	skb_dst_drop(skb);
2595	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2596						      &fl6, skb, flags));
2597}
2598
2599INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2600					     struct fib6_table *table,
2601					     struct flowi6 *fl6,
2602					     const struct sk_buff *skb,
2603					     int flags)
2604{
2605	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2606}
2607
2608static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2609						      const struct sock *sk,
2610						      struct flowi6 *fl6,
2611						      int flags)
2612{
 
2613	bool any_src;
2614
2615	if (ipv6_addr_type(&fl6->daddr) &
2616	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2617		struct dst_entry *dst;
2618
2619		/* This function does not take refcnt on the dst */
2620		dst = l3mdev_link_scope_lookup(net, fl6);
2621		if (dst)
2622			return dst;
2623	}
2624
2625	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2626
2627	flags |= RT6_LOOKUP_F_DST_NOREF;
2628	any_src = ipv6_addr_any(&fl6->saddr);
2629	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2630	    (fl6->flowi6_oif && any_src))
2631		flags |= RT6_LOOKUP_F_IFACE;
2632
2633	if (!any_src)
2634		flags |= RT6_LOOKUP_F_HAS_SADDR;
2635	else if (sk)
2636		flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
2637
2638	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2639}
2640
2641struct dst_entry *ip6_route_output_flags(struct net *net,
2642					 const struct sock *sk,
2643					 struct flowi6 *fl6,
2644					 int flags)
2645{
2646	struct dst_entry *dst;
2647	struct rt6_info *rt6;
2648
2649	rcu_read_lock();
2650	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2651	rt6 = dst_rt6_info(dst);
2652	/* For dst cached in uncached_list, refcnt is already taken. */
2653	if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
2654		dst = &net->ipv6.ip6_null_entry->dst;
2655		dst_hold(dst);
2656	}
2657	rcu_read_unlock();
2658
2659	return dst;
2660}
2661EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2662
2663struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2664{
2665	struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
2666	struct net_device *loopback_dev = net->loopback_dev;
2667	struct dst_entry *new = NULL;
2668
2669	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
2670		       DST_OBSOLETE_DEAD, 0);
2671	if (rt) {
2672		rt6_info_init(rt);
2673		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2674
2675		new = &rt->dst;
2676		new->__use = 1;
2677		new->input = dst_discard;
2678		new->output = dst_discard_out;
2679
2680		dst_copy_metrics(new, &ort->dst);
 
 
 
2681
2682		rt->rt6i_idev = in6_dev_get(loopback_dev);
2683		rt->rt6i_gateway = ort->rt6i_gateway;
2684		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
2685
2686		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2687#ifdef CONFIG_IPV6_SUBTREES
2688		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2689#endif
 
 
2690	}
2691
2692	dst_release(dst_orig);
2693	return new ? new : ERR_PTR(-ENOMEM);
2694}
2695
2696/*
2697 *	Destination cache support functions
2698 */
2699
2700static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2701{
2702	u32 rt_cookie = 0;
2703
2704	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2705		return false;
2706
2707	if (fib6_check_expired(f6i))
2708		return false;
2709
2710	return true;
2711}
2712
2713static struct dst_entry *rt6_check(struct rt6_info *rt,
2714				   struct fib6_info *from,
2715				   u32 cookie)
2716{
2717	u32 rt_cookie = 0;
2718
2719	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2720	    rt_cookie != cookie)
2721		return NULL;
2722
2723	if (rt6_check_expired(rt))
2724		return NULL;
2725
2726	return &rt->dst;
2727}
2728
2729static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2730					    struct fib6_info *from,
2731					    u32 cookie)
2732{
2733	if (!__rt6_check_expired(rt) &&
2734	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2735	    fib6_check(from, cookie))
2736		return &rt->dst;
2737	else
2738		return NULL;
2739}
2740
2741INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
2742							u32 cookie)
2743{
2744	struct dst_entry *dst_ret;
2745	struct fib6_info *from;
2746	struct rt6_info *rt;
2747
2748	rt = dst_rt6_info(dst);
2749
2750	if (rt->sernum)
2751		return rt6_is_valid(rt) ? dst : NULL;
2752
2753	rcu_read_lock();
2754
2755	/* All IPV6 dsts are created with ->obsolete set to the value
2756	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2757	 * into this function always.
2758	 */
2759
2760	from = rcu_dereference(rt->from);
2761
2762	if (from && (rt->rt6i_flags & RTF_PCPU ||
2763	    unlikely(!list_empty(&rt->dst.rt_uncached))))
2764		dst_ret = rt6_dst_from_check(rt, from, cookie);
2765	else
2766		dst_ret = rt6_check(rt, from, cookie);
2767
2768	rcu_read_unlock();
2769
2770	return dst_ret;
2771}
2772EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
2773
2774static void ip6_negative_advice(struct sock *sk,
2775				struct dst_entry *dst)
2776{
2777	struct rt6_info *rt = dst_rt6_info(dst);
2778
2779	if (rt->rt6i_flags & RTF_CACHE) {
2780		rcu_read_lock();
2781		if (rt6_check_expired(rt)) {
2782			/* counteract the dst_release() in sk_dst_reset() */
2783			dst_hold(dst);
2784			sk_dst_reset(sk);
2785
2786			rt6_remove_exception_rt(rt);
 
2787		}
2788		rcu_read_unlock();
2789		return;
2790	}
2791	sk_dst_reset(sk);
2792}
2793
2794static void ip6_link_failure(struct sk_buff *skb)
2795{
2796	struct rt6_info *rt;
2797
2798	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2799
2800	rt = dst_rt6_info(skb_dst(skb));
2801	if (rt) {
2802		rcu_read_lock();
2803		if (rt->rt6i_flags & RTF_CACHE) {
2804			rt6_remove_exception_rt(rt);
2805		} else {
2806			struct fib6_info *from;
2807			struct fib6_node *fn;
2808
2809			from = rcu_dereference(rt->from);
2810			if (from) {
2811				fn = rcu_dereference(from->fib6_node);
2812				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2813					WRITE_ONCE(fn->fn_sernum, -1);
2814			}
2815		}
2816		rcu_read_unlock();
2817	}
2818}
2819
2820static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2821{
2822	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2823		struct fib6_info *from;
2824
2825		rcu_read_lock();
2826		from = rcu_dereference(rt0->from);
2827		if (from)
2828			rt0->dst.expires = from->expires;
2829		rcu_read_unlock();
2830	}
2831
2832	dst_set_expires(&rt0->dst, timeout);
2833	rt0->rt6i_flags |= RTF_EXPIRES;
2834}
2835
2836static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2837{
2838	struct net *net = dev_net(rt->dst.dev);
2839
2840	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2841	rt->rt6i_flags |= RTF_MODIFIED;
 
2842	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2843}
2844
2845static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2846{
2847	return !(rt->rt6i_flags & RTF_CACHE) &&
2848		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2849}
2850
2851static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2852				 const struct ipv6hdr *iph, u32 mtu,
2853				 bool confirm_neigh)
2854{
2855	const struct in6_addr *daddr, *saddr;
2856	struct rt6_info *rt6 = dst_rt6_info(dst);
2857
2858	/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2859	 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2860	 * [see also comment in rt6_mtu_change_route()]
2861	 */
2862
2863	if (iph) {
2864		daddr = &iph->daddr;
2865		saddr = &iph->saddr;
2866	} else if (sk) {
2867		daddr = &sk->sk_v6_daddr;
2868		saddr = &inet6_sk(sk)->saddr;
2869	} else {
2870		daddr = NULL;
2871		saddr = NULL;
2872	}
2873
2874	if (confirm_neigh)
2875		dst_confirm_neigh(dst, daddr);
2876
2877	if (mtu < IPV6_MIN_MTU)
2878		return;
2879	if (mtu >= dst_mtu(dst))
2880		return;
2881
2882	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2883		rt6_do_update_pmtu(rt6, mtu);
2884		/* update rt6_ex->stamp for cache */
2885		if (rt6->rt6i_flags & RTF_CACHE)
2886			rt6_update_exception_stamp_rt(rt6);
2887	} else if (daddr) {
2888		struct fib6_result res = {};
2889		struct rt6_info *nrt6;
2890
2891		rcu_read_lock();
2892		res.f6i = rcu_dereference(rt6->from);
2893		if (!res.f6i)
2894			goto out_unlock;
2895
2896		res.fib6_flags = res.f6i->fib6_flags;
2897		res.fib6_type = res.f6i->fib6_type;
2898
2899		if (res.f6i->nh) {
2900			struct fib6_nh_match_arg arg = {
2901				.dev = dst->dev,
2902				.gw = &rt6->rt6i_gateway,
2903			};
2904
2905			nexthop_for_each_fib6_nh(res.f6i->nh,
2906						 fib6_nh_find_match, &arg);
2907
2908			/* fib6_info uses a nexthop that does not have fib6_nh
2909			 * using the dst->dev + gw. Should be impossible.
2910			 */
2911			if (!arg.match)
2912				goto out_unlock;
2913
2914			res.nh = arg.match;
2915		} else {
2916			res.nh = res.f6i->fib6_nh;
2917		}
2918
2919		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2920		if (nrt6) {
2921			rt6_do_update_pmtu(nrt6, mtu);
2922			if (rt6_insert_exception(nrt6, &res))
2923				dst_release_immediate(&nrt6->dst);
 
 
 
 
 
2924		}
2925out_unlock:
2926		rcu_read_unlock();
2927	}
2928}
2929
2930static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2931			       struct sk_buff *skb, u32 mtu,
2932			       bool confirm_neigh)
2933{
2934	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2935			     confirm_neigh);
2936}
2937
2938void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2939		     int oif, u32 mark, kuid_t uid)
2940{
2941	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2942	struct dst_entry *dst;
2943	struct flowi6 fl6 = {
2944		.flowi6_oif = oif,
2945		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2946		.daddr = iph->daddr,
2947		.saddr = iph->saddr,
2948		.flowlabel = ip6_flowinfo(iph),
2949		.flowi6_uid = uid,
2950	};
2951
2952	dst = ip6_route_output(net, NULL, &fl6);
2953	if (!dst->error)
2954		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2955	dst_release(dst);
2956}
2957EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2958
2959void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2960{
2961	int oif = sk->sk_bound_dev_if;
2962	struct dst_entry *dst;
2963
2964	if (!oif && skb->dev)
2965		oif = l3mdev_master_ifindex(skb->dev);
2966
2967	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
2968			sk->sk_uid);
2969
2970	dst = __sk_dst_get(sk);
2971	if (!dst || !dst->obsolete ||
2972	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2973		return;
2974
2975	bh_lock_sock(sk);
2976	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2977		ip6_datagram_dst_update(sk, false);
2978	bh_unlock_sock(sk);
2979}
2980EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2981
2982void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2983			   const struct flowi6 *fl6)
2984{
2985#ifdef CONFIG_IPV6_SUBTREES
2986	struct ipv6_pinfo *np = inet6_sk(sk);
2987#endif
2988
2989	ip6_dst_store(sk, dst,
2990		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2991		      &sk->sk_v6_daddr : NULL,
2992#ifdef CONFIG_IPV6_SUBTREES
2993		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2994		      &np->saddr :
2995#endif
2996		      NULL);
2997}
2998
2999static bool ip6_redirect_nh_match(const struct fib6_result *res,
3000				  struct flowi6 *fl6,
3001				  const struct in6_addr *gw,
3002				  struct rt6_info **ret)
3003{
3004	const struct fib6_nh *nh = res->nh;
3005
3006	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
3007	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
3008		return false;
3009
3010	/* rt_cache's gateway might be different from its 'parent'
3011	 * in the case of an ip redirect.
3012	 * So we keep searching in the exception table if the gateway
3013	 * is different.
3014	 */
3015	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
3016		struct rt6_info *rt_cache;
3017
3018		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
3019		if (rt_cache &&
3020		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
3021			*ret = rt_cache;
3022			return true;
3023		}
3024		return false;
3025	}
3026	return true;
3027}
3028
3029struct fib6_nh_rd_arg {
3030	struct fib6_result	*res;
3031	struct flowi6		*fl6;
3032	const struct in6_addr	*gw;
3033	struct rt6_info		**ret;
3034};
3035
3036static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
3037{
3038	struct fib6_nh_rd_arg *arg = _arg;
3039
3040	arg->res->nh = nh;
3041	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
3042}
3043
3044/* Handle redirects */
3045struct ip6rd_flowi {
3046	struct flowi6 fl6;
3047	struct in6_addr gateway;
3048};
3049
3050INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
3051					     struct fib6_table *table,
3052					     struct flowi6 *fl6,
3053					     const struct sk_buff *skb,
3054					     int flags)
3055{
3056	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
3057	struct rt6_info *ret = NULL;
3058	struct fib6_result res = {};
3059	struct fib6_nh_rd_arg arg = {
3060		.res = &res,
3061		.fl6 = fl6,
3062		.gw  = &rdfl->gateway,
3063		.ret = &ret
3064	};
3065	struct fib6_info *rt;
3066	struct fib6_node *fn;
3067
3068	/* Get the "current" route for this destination and
3069	 * check if the redirect has come from appropriate router.
3070	 *
3071	 * RFC 4861 specifies that redirects should only be
3072	 * accepted if they come from the nexthop to the target.
3073	 * Due to the way the routes are chosen, this notion
3074	 * is a bit fuzzy and one might need to check all possible
3075	 * routes.
3076	 */
3077
3078	rcu_read_lock();
3079	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
3080restart:
3081	for_each_fib6_node_rt_rcu(fn) {
3082		res.f6i = rt;
3083		if (fib6_check_expired(rt))
3084			continue;
3085		if (rt->fib6_flags & RTF_REJECT)
3086			break;
3087		if (unlikely(rt->nh)) {
3088			if (nexthop_is_blackhole(rt->nh))
3089				continue;
3090			/* on match, res->nh is filled in and potentially ret */
3091			if (nexthop_for_each_fib6_nh(rt->nh,
3092						     fib6_nh_redirect_match,
3093						     &arg))
3094				goto out;
3095		} else {
3096			res.nh = rt->fib6_nh;
3097			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
3098						  &ret))
3099				goto out;
3100		}
3101	}
3102
3103	if (!rt)
3104		rt = net->ipv6.fib6_null_entry;
3105	else if (rt->fib6_flags & RTF_REJECT) {
3106		ret = net->ipv6.ip6_null_entry;
3107		goto out;
3108	}
3109
3110	if (rt == net->ipv6.fib6_null_entry) {
3111		fn = fib6_backtrack(fn, &fl6->saddr);
3112		if (fn)
3113			goto restart;
3114	}
3115
3116	res.f6i = rt;
3117	res.nh = rt->fib6_nh;
3118out:
3119	if (ret) {
3120		ip6_hold_safe(net, &ret);
3121	} else {
3122		res.fib6_flags = res.f6i->fib6_flags;
3123		res.fib6_type = res.f6i->fib6_type;
3124		ret = ip6_create_rt_rcu(&res);
3125	}
3126
3127	rcu_read_unlock();
3128
3129	trace_fib6_table_lookup(net, &res, table, fl6);
3130	return ret;
3131};
3132
3133static struct dst_entry *ip6_route_redirect(struct net *net,
3134					    const struct flowi6 *fl6,
3135					    const struct sk_buff *skb,
3136					    const struct in6_addr *gateway)
3137{
3138	int flags = RT6_LOOKUP_F_HAS_SADDR;
3139	struct ip6rd_flowi rdfl;
3140
3141	rdfl.fl6 = *fl6;
3142	rdfl.gateway = *gateway;
3143
3144	return fib6_rule_lookup(net, &rdfl.fl6, skb,
3145				flags, __ip6_route_redirect);
3146}
3147
3148void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3149		  kuid_t uid)
3150{
3151	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3152	struct dst_entry *dst;
3153	struct flowi6 fl6 = {
3154		.flowi6_iif = LOOPBACK_IFINDEX,
3155		.flowi6_oif = oif,
3156		.flowi6_mark = mark,
3157		.daddr = iph->daddr,
3158		.saddr = iph->saddr,
3159		.flowlabel = ip6_flowinfo(iph),
3160		.flowi6_uid = uid,
3161	};
3162
3163	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3164	rt6_do_redirect(dst, NULL, skb);
3165	dst_release(dst);
3166}
3167EXPORT_SYMBOL_GPL(ip6_redirect);
3168
3169void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
 
3170{
3171	const struct ipv6hdr *iph = ipv6_hdr(skb);
3172	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3173	struct dst_entry *dst;
3174	struct flowi6 fl6 = {
3175		.flowi6_iif = LOOPBACK_IFINDEX,
3176		.flowi6_oif = oif,
3177		.daddr = msg->dest,
3178		.saddr = iph->daddr,
3179		.flowi6_uid = sock_net_uid(net, NULL),
3180	};
 
3181
3182	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3183	rt6_do_redirect(dst, NULL, skb);
3184	dst_release(dst);
3185}
3186
3187void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3188{
3189	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
3190		     READ_ONCE(sk->sk_mark), sk->sk_uid);
3191}
3192EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3193
3194static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3195{
3196	struct net_device *dev = dst->dev;
3197	unsigned int mtu = dst_mtu(dst);
3198	struct net *net = dev_net(dev);
3199
3200	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3201
3202	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3203		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3204
3205	/*
3206	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3207	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3208	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3209	 * rely only on pmtu discovery"
3210	 */
3211	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3212		mtu = IPV6_MAXPLEN;
3213	return mtu;
3214}
3215
3216INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
3217{
3218	return ip6_dst_mtu_maybe_forward(dst, false);
3219}
3220EXPORT_INDIRECT_CALLABLE(ip6_mtu);
3221
3222/* MTU selection:
3223 * 1. mtu on route is locked - use it
3224 * 2. mtu from nexthop exception
3225 * 3. mtu from egress device
3226 *
3227 * based on ip6_dst_mtu_forward and exception logic of
3228 * rt6_find_cached_rt; called with rcu_read_lock
3229 */
3230u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3231		      const struct in6_addr *daddr,
3232		      const struct in6_addr *saddr)
3233{
3234	const struct fib6_nh *nh = res->nh;
3235	struct fib6_info *f6i = res->f6i;
3236	struct inet6_dev *idev;
3237	struct rt6_info *rt;
3238	u32 mtu = 0;
3239
3240	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3241		mtu = f6i->fib6_pmtu;
3242		if (mtu)
3243			goto out;
3244	}
3245
3246	rt = rt6_find_cached_rt(res, daddr, saddr);
3247	if (unlikely(rt)) {
3248		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3249	} else {
3250		struct net_device *dev = nh->fib_nh_dev;
3251
3252		mtu = IPV6_MIN_MTU;
3253		idev = __in6_dev_get(dev);
3254		if (idev)
3255			mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
3256	}
3257
3258	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3259out:
3260	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3261}
3262
 
 
 
3263struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3264				  struct flowi6 *fl6)
3265{
3266	struct dst_entry *dst;
3267	struct rt6_info *rt;
3268	struct inet6_dev *idev = in6_dev_get(dev);
3269	struct net *net = dev_net(dev);
3270
3271	if (unlikely(!idev))
3272		return ERR_PTR(-ENODEV);
3273
3274	rt = ip6_dst_alloc(net, dev, 0);
3275	if (unlikely(!rt)) {
3276		in6_dev_put(idev);
3277		dst = ERR_PTR(-ENOMEM);
3278		goto out;
3279	}
3280
3281	rt->dst.input = ip6_input;
3282	rt->dst.output  = ip6_output;
 
3283	rt->rt6i_gateway  = fl6->daddr;
3284	rt->rt6i_dst.addr = fl6->daddr;
3285	rt->rt6i_dst.plen = 128;
3286	rt->rt6i_idev     = idev;
3287	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3288
3289	/* Add this dst into uncached_list so that rt6_disable_ip() can
3290	 * do proper release of the net_device
3291	 */
3292	rt6_uncached_list_add(rt);
 
 
3293
3294	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3295
3296out:
3297	return dst;
3298}
3299
3300static void ip6_dst_gc(struct dst_ops *ops)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3301{
3302	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3303	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
3304	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3305	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3306	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3307	unsigned int val;
3308	int entries;
3309
3310	if (time_after(rt_last_gc + rt_min_interval, jiffies))
 
 
3311		goto out;
3312
3313	fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
 
3314	entries = dst_entries_get_slow(ops);
3315	if (entries < ops->gc_thresh)
3316		atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
3317out:
3318	val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
3319	atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
3320}
3321
3322static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3323			       const struct in6_addr *gw_addr, u32 tbid,
3324			       int flags, struct fib6_result *res)
3325{
3326	struct flowi6 fl6 = {
3327		.flowi6_oif = cfg->fc_ifindex,
3328		.daddr = *gw_addr,
3329		.saddr = cfg->fc_prefsrc,
3330	};
3331	struct fib6_table *table;
3332	int err;
3333
3334	table = fib6_get_table(net, tbid);
3335	if (!table)
3336		return -EINVAL;
3337
3338	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3339		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
3340
3341	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
 
 
3342
3343	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3344	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3345		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3346				 cfg->fc_ifindex != 0, NULL, flags);
3347
3348	return err;
3349}
3350
3351static int ip6_route_check_nh_onlink(struct net *net,
3352				     struct fib6_config *cfg,
3353				     const struct net_device *dev,
3354				     struct netlink_ext_ack *extack)
3355{
3356	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3357	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3358	struct fib6_result res = {};
3359	int err;
3360
3361	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3362	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3363	    /* ignore match if it is the default route */
3364	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3365	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3366		NL_SET_ERR_MSG(extack,
3367			       "Nexthop has invalid gateway or device mismatch");
3368		err = -EINVAL;
3369	}
3370
3371	return err;
3372}
3373
3374static int ip6_route_check_nh(struct net *net,
3375			      struct fib6_config *cfg,
3376			      struct net_device **_dev,
3377			      netdevice_tracker *dev_tracker,
3378			      struct inet6_dev **idev)
3379{
3380	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3381	struct net_device *dev = _dev ? *_dev : NULL;
3382	int flags = RT6_LOOKUP_F_IFACE;
3383	struct fib6_result res = {};
3384	int err = -EHOSTUNREACH;
3385
3386	if (cfg->fc_table) {
3387		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3388					  cfg->fc_table, flags, &res);
3389		/* gw_addr can not require a gateway or resolve to a reject
3390		 * route. If a device is given, it must match the result.
3391		 */
3392		if (err || res.fib6_flags & RTF_REJECT ||
3393		    res.nh->fib_nh_gw_family ||
3394		    (dev && dev != res.nh->fib_nh_dev))
3395			err = -EHOSTUNREACH;
3396	}
3397
3398	if (err < 0) {
3399		struct flowi6 fl6 = {
3400			.flowi6_oif = cfg->fc_ifindex,
3401			.daddr = *gw_addr,
3402		};
3403
3404		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3405		if (err || res.fib6_flags & RTF_REJECT ||
3406		    res.nh->fib_nh_gw_family)
3407			err = -EHOSTUNREACH;
3408
3409		if (err)
3410			return err;
3411
3412		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3413				 cfg->fc_ifindex != 0, NULL, flags);
3414	}
3415
3416	err = 0;
3417	if (dev) {
3418		if (dev != res.nh->fib_nh_dev)
3419			err = -EHOSTUNREACH;
3420	} else {
3421		*_dev = dev = res.nh->fib_nh_dev;
3422		netdev_hold(dev, dev_tracker, GFP_ATOMIC);
3423		*idev = in6_dev_get(dev);
3424	}
3425
3426	return err;
3427}
3428
3429static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3430			   struct net_device **_dev,
3431			   netdevice_tracker *dev_tracker,
3432			   struct inet6_dev **idev,
3433			   struct netlink_ext_ack *extack)
3434{
3435	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3436	int gwa_type = ipv6_addr_type(gw_addr);
3437	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3438	const struct net_device *dev = *_dev;
3439	bool need_addr_check = !dev;
3440	int err = -EINVAL;
3441
3442	/* if gw_addr is local we will fail to detect this in case
3443	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3444	 * will return already-added prefix route via interface that
3445	 * prefix route was assigned to, which might be non-loopback.
3446	 */
3447	if (dev &&
3448	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3449		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3450		goto out;
 
 
 
 
 
 
 
 
 
3451	}
3452
3453	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3454		/* IPv6 strictly inhibits using not link-local
3455		 * addresses as nexthop address.
3456		 * Otherwise, router will not able to send redirects.
3457		 * It is very good, but in some (rare!) circumstances
3458		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3459		 * some exceptions. --ANK
3460		 * We allow IPv4-mapped nexthops to support RFC4798-type
3461		 * addressing
3462		 */
3463		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3464			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3465			goto out;
3466		}
3467
3468		rcu_read_lock();
3469
3470		if (cfg->fc_flags & RTNH_F_ONLINK)
3471			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3472		else
3473			err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
3474						 idev);
3475
3476		rcu_read_unlock();
3477
3478		if (err)
3479			goto out;
3480	}
3481
3482	/* reload in case device was changed */
3483	dev = *_dev;
3484
3485	err = -EINVAL;
3486	if (!dev) {
3487		NL_SET_ERR_MSG(extack, "Egress device not specified");
3488		goto out;
3489	} else if (dev->flags & IFF_LOOPBACK) {
3490		NL_SET_ERR_MSG(extack,
3491			       "Egress device can not be loopback device for this route");
3492		goto out;
3493	}
3494
3495	/* if we did not check gw_addr above, do so now that the
3496	 * egress device has been resolved.
3497	 */
3498	if (need_addr_check &&
3499	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3500		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3501		goto out;
3502	}
3503
3504	err = 0;
3505out:
3506	return err;
3507}
 
3508
3509static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3510{
3511	if ((flags & RTF_REJECT) ||
3512	    (dev && (dev->flags & IFF_LOOPBACK) &&
3513	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3514	     !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3515		return true;
3516
3517	return false;
3518}
3519
3520int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3521		 struct fib6_config *cfg, gfp_t gfp_flags,
3522		 struct netlink_ext_ack *extack)
3523{
3524	netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
3525	struct net_device *dev = NULL;
3526	struct inet6_dev *idev = NULL;
3527	int addr_type;
3528	int err;
3529
3530	fib6_nh->fib_nh_family = AF_INET6;
3531#ifdef CONFIG_IPV6_ROUTER_PREF
3532	fib6_nh->last_probe = jiffies;
3533#endif
3534	if (cfg->fc_is_fdb) {
3535		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3536		fib6_nh->fib_nh_gw_family = AF_INET6;
3537		return 0;
3538	}
3539
3540	err = -ENODEV;
3541	if (cfg->fc_ifindex) {
3542		dev = netdev_get_by_index(net, cfg->fc_ifindex,
3543					  dev_tracker, gfp_flags);
3544		if (!dev)
3545			goto out;
3546		idev = in6_dev_get(dev);
3547		if (!idev)
3548			goto out;
3549	}
3550
3551	if (cfg->fc_flags & RTNH_F_ONLINK) {
3552		if (!dev) {
3553			NL_SET_ERR_MSG(extack,
3554				       "Nexthop device required for onlink");
3555			goto out;
 
 
 
 
3556		}
 
 
 
 
 
3557
3558		if (!(dev->flags & IFF_UP)) {
3559			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3560			err = -ENETDOWN;
3561			goto out;
3562		}
3563
3564		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3565	}
 
 
3566
3567	fib6_nh->fib_nh_weight = 1;
3568
3569	/* We cannot add true routes via loopback here,
3570	 * they would result in kernel looping; promote them to reject routes
3571	 */
3572	addr_type = ipv6_addr_type(&cfg->fc_dst);
3573	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
 
3574		/* hold loopback dev/idev if we haven't done so. */
3575		if (dev != net->loopback_dev) {
3576			if (dev) {
3577				netdev_put(dev, dev_tracker);
3578				in6_dev_put(idev);
3579			}
3580			dev = net->loopback_dev;
3581			netdev_hold(dev, dev_tracker, gfp_flags);
3582			idev = in6_dev_get(dev);
3583			if (!idev) {
3584				err = -ENODEV;
3585				goto out;
3586			}
3587		}
3588		goto pcpu_alloc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3589	}
3590
3591	if (cfg->fc_flags & RTF_GATEWAY) {
3592		err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
3593				      &idev, extack);
3594		if (err)
3595			goto out;
3596
3597		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3598		fib6_nh->fib_nh_gw_family = AF_INET6;
3599	}
3600
3601	err = -ENODEV;
3602	if (!dev)
3603		goto out;
 
 
 
 
 
 
 
3604
3605	if (idev->cnf.disable_ipv6) {
3606		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3607		err = -EACCES;
3608		goto out;
3609	}
3610
3611	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3612		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3613		err = -ENETDOWN;
3614		goto out;
3615	}
3616
3617	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3618	    !netif_carrier_ok(dev))
3619		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 
 
 
 
 
 
3620
3621	err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3622				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3623	if (err)
3624		goto out;
3625
3626pcpu_alloc:
3627	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3628	if (!fib6_nh->rt6i_pcpu) {
3629		err = -ENOMEM;
3630		goto out;
3631	}
 
 
 
 
 
 
 
 
 
 
 
3632
3633	fib6_nh->fib_nh_dev = dev;
3634	fib6_nh->fib_nh_oif = dev->ifindex;
3635	err = 0;
3636out:
3637	if (idev)
3638		in6_dev_put(idev);
3639
3640	if (err) {
3641		lwtstate_put(fib6_nh->fib_nh_lws);
3642		fib6_nh->fib_nh_lws = NULL;
3643		netdev_put(dev, dev_tracker);
3644	}
3645
3646	return err;
3647}
3648
3649void fib6_nh_release(struct fib6_nh *fib6_nh)
3650{
3651	struct rt6_exception_bucket *bucket;
3652
3653	rcu_read_lock();
3654
3655	fib6_nh_flush_exceptions(fib6_nh, NULL);
3656	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3657	if (bucket) {
3658		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3659		kfree(bucket);
3660	}
3661
3662	rcu_read_unlock();
3663
3664	fib6_nh_release_dsts(fib6_nh);
3665	free_percpu(fib6_nh->rt6i_pcpu);
3666
3667	fib_nh_common_release(&fib6_nh->nh_common);
3668}
3669
3670void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
3671{
3672	int cpu;
3673
3674	if (!fib6_nh->rt6i_pcpu)
3675		return;
3676
3677	for_each_possible_cpu(cpu) {
3678		struct rt6_info *pcpu_rt, **ppcpu_rt;
3679
3680		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3681		pcpu_rt = xchg(ppcpu_rt, NULL);
3682		if (pcpu_rt) {
3683			dst_dev_put(&pcpu_rt->dst);
3684			dst_release(&pcpu_rt->dst);
3685		}
3686	}
3687}
3688
3689static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3690					      gfp_t gfp_flags,
3691					      struct netlink_ext_ack *extack)
3692{
3693	struct net *net = cfg->fc_nlinfo.nl_net;
3694	struct fib6_info *rt = NULL;
3695	struct nexthop *nh = NULL;
3696	struct fib6_table *table;
3697	struct fib6_nh *fib6_nh;
3698	int err = -EINVAL;
3699	int addr_type;
3700
3701	/* RTF_PCPU is an internal flag; can not be set by userspace */
3702	if (cfg->fc_flags & RTF_PCPU) {
3703		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3704		goto out;
3705	}
3706
3707	/* RTF_CACHE is an internal flag; can not be set by userspace */
3708	if (cfg->fc_flags & RTF_CACHE) {
3709		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3710		goto out;
3711	}
3712
3713	if (cfg->fc_type > RTN_MAX) {
3714		NL_SET_ERR_MSG(extack, "Invalid route type");
3715		goto out;
3716	}
3717
3718	if (cfg->fc_dst_len > 128) {
3719		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3720		goto out;
3721	}
3722	if (cfg->fc_src_len > 128) {
3723		NL_SET_ERR_MSG(extack, "Invalid source address length");
3724		goto out;
3725	}
3726#ifndef CONFIG_IPV6_SUBTREES
3727	if (cfg->fc_src_len) {
3728		NL_SET_ERR_MSG(extack,
3729			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3730		goto out;
3731	}
3732#endif
3733	if (cfg->fc_nh_id) {
3734		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3735		if (!nh) {
3736			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3737			goto out;
3738		}
3739		err = fib6_check_nexthop(nh, cfg, extack);
3740		if (err)
3741			goto out;
3742	}
3743
3744	err = -ENOBUFS;
3745	if (cfg->fc_nlinfo.nlh &&
3746	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3747		table = fib6_get_table(net, cfg->fc_table);
3748		if (!table) {
3749			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3750			table = fib6_new_table(net, cfg->fc_table);
3751		}
3752	} else {
3753		table = fib6_new_table(net, cfg->fc_table);
3754	}
3755
3756	if (!table)
3757		goto out;
3758
3759	err = -ENOMEM;
3760	rt = fib6_info_alloc(gfp_flags, !nh);
3761	if (!rt)
3762		goto out;
3763
3764	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3765					       extack);
3766	if (IS_ERR(rt->fib6_metrics)) {
3767		err = PTR_ERR(rt->fib6_metrics);
3768		/* Do not leave garbage there. */
3769		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3770		goto out_free;
3771	}
3772
3773	if (cfg->fc_flags & RTF_ADDRCONF)
3774		rt->dst_nocount = true;
3775
3776	if (cfg->fc_flags & RTF_EXPIRES)
3777		fib6_set_expires(rt, jiffies +
3778				clock_t_to_jiffies(cfg->fc_expires));
3779
3780	if (cfg->fc_protocol == RTPROT_UNSPEC)
3781		cfg->fc_protocol = RTPROT_BOOT;
3782	rt->fib6_protocol = cfg->fc_protocol;
3783
3784	rt->fib6_table = table;
3785	rt->fib6_metric = cfg->fc_metric;
3786	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3787	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3788
3789	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3790	rt->fib6_dst.plen = cfg->fc_dst_len;
3791
3792#ifdef CONFIG_IPV6_SUBTREES
3793	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3794	rt->fib6_src.plen = cfg->fc_src_len;
3795#endif
3796	if (nh) {
3797		if (rt->fib6_src.plen) {
3798			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3799			goto out_free;
3800		}
3801		if (!nexthop_get(nh)) {
3802			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3803			goto out_free;
3804		}
3805		rt->nh = nh;
3806		fib6_nh = nexthop_fib6_nh(rt->nh);
3807	} else {
3808		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3809		if (err)
3810			goto out;
3811
3812		fib6_nh = rt->fib6_nh;
3813
3814		/* We cannot add true routes via loopback here, they would
3815		 * result in kernel looping; promote them to reject routes
3816		 */
3817		addr_type = ipv6_addr_type(&cfg->fc_dst);
3818		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3819				   addr_type))
3820			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3821	}
3822
3823	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3824		struct net_device *dev = fib6_nh->fib_nh_dev;
3825
3826		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3827			NL_SET_ERR_MSG(extack, "Invalid source address");
3828			err = -EINVAL;
3829			goto out;
3830		}
3831		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3832		rt->fib6_prefsrc.plen = 128;
3833	} else
3834		rt->fib6_prefsrc.plen = 0;
 
 
 
 
 
 
 
 
 
3835
3836	return rt;
3837out:
3838	fib6_info_release(rt);
3839	return ERR_PTR(err);
3840out_free:
3841	ip_fib_metrics_put(rt->fib6_metrics);
3842	kfree(rt);
 
 
3843	return ERR_PTR(err);
3844}
3845
3846int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3847		  struct netlink_ext_ack *extack)
3848{
3849	struct fib6_info *rt;
 
3850	int err;
3851
3852	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3853	if (IS_ERR(rt))
3854		return PTR_ERR(rt);
 
 
 
 
 
 
 
 
 
 
 
3855
3856	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3857	fib6_info_release(rt);
 
 
3858
3859	return err;
3860}
3861
3862static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3863{
3864	struct net *net = info->nl_net;
3865	struct fib6_table *table;
3866	int err;
3867
3868	if (rt == net->ipv6.fib6_null_entry) {
 
3869		err = -ENOENT;
3870		goto out;
3871	}
3872
3873	table = rt->fib6_table;
3874	spin_lock_bh(&table->tb6_lock);
3875	err = fib6_del(rt, info);
3876	spin_unlock_bh(&table->tb6_lock);
3877
3878out:
3879	fib6_info_release(rt);
3880	return err;
3881}
3882
3883int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3884{
3885	struct nl_info info = {
3886		.nl_net = net,
3887		.skip_notify = skip_notify
3888	};
3889
3890	return __ip6_del_rt(rt, &info);
3891}
3892
3893static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3894{
3895	struct nl_info *info = &cfg->fc_nlinfo;
3896	struct net *net = info->nl_net;
3897	struct sk_buff *skb = NULL;
3898	struct fib6_table *table;
3899	int err = -ENOENT;
3900
3901	if (rt == net->ipv6.fib6_null_entry)
3902		goto out_put;
3903	table = rt->fib6_table;
3904	spin_lock_bh(&table->tb6_lock);
3905
3906	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3907		struct fib6_info *sibling, *next_sibling;
3908		struct fib6_node *fn;
3909
3910		/* prefer to send a single notification with all hops */
3911		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3912		if (skb) {
3913			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3914
3915			if (rt6_fill_node(net, skb, rt, NULL,
3916					  NULL, NULL, 0, RTM_DELROUTE,
3917					  info->portid, seq, 0) < 0) {
3918				kfree_skb(skb);
3919				skb = NULL;
3920			} else
3921				info->skip_notify = 1;
3922		}
3923
3924		/* 'rt' points to the first sibling route. If it is not the
3925		 * leaf, then we do not need to send a notification. Otherwise,
3926		 * we need to check if the last sibling has a next route or not
3927		 * and emit a replace or delete notification, respectively.
3928		 */
3929		info->skip_notify_kernel = 1;
3930		fn = rcu_dereference_protected(rt->fib6_node,
3931					    lockdep_is_held(&table->tb6_lock));
3932		if (rcu_access_pointer(fn->leaf) == rt) {
3933			struct fib6_info *last_sibling, *replace_rt;
3934
3935			last_sibling = list_last_entry(&rt->fib6_siblings,
3936						       struct fib6_info,
3937						       fib6_siblings);
3938			replace_rt = rcu_dereference_protected(
3939					    last_sibling->fib6_next,
3940					    lockdep_is_held(&table->tb6_lock));
3941			if (replace_rt)
3942				call_fib6_entry_notifiers_replace(net,
3943								  replace_rt);
3944			else
3945				call_fib6_multipath_entry_notifiers(net,
3946						       FIB_EVENT_ENTRY_DEL,
3947						       rt, rt->fib6_nsiblings,
3948						       NULL);
3949		}
3950		list_for_each_entry_safe(sibling, next_sibling,
3951					 &rt->fib6_siblings,
3952					 fib6_siblings) {
3953			err = fib6_del(sibling, info);
3954			if (err)
3955				goto out_unlock;
3956		}
3957	}
3958
3959	err = fib6_del(rt, info);
3960out_unlock:
3961	spin_unlock_bh(&table->tb6_lock);
3962out_put:
3963	fib6_info_release(rt);
3964
3965	if (skb) {
3966		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3967			    info->nlh, gfp_any());
3968	}
3969	return err;
3970}
3971
3972static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3973{
3974	int rc = -ESRCH;
3975
3976	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3977		goto out;
3978
3979	if (cfg->fc_flags & RTF_GATEWAY &&
3980	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3981		goto out;
3982
3983	rc = rt6_remove_exception_rt(rt);
3984out:
3985	return rc;
3986}
3987
3988static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3989			     struct fib6_nh *nh)
3990{
3991	struct fib6_result res = {
3992		.f6i = rt,
3993		.nh = nh,
3994	};
3995	struct rt6_info *rt_cache;
3996
3997	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3998	if (rt_cache)
3999		return __ip6_del_cached_rt(rt_cache, cfg);
4000
4001	return 0;
4002}
4003
4004struct fib6_nh_del_cached_rt_arg {
4005	struct fib6_config *cfg;
4006	struct fib6_info *f6i;
4007};
4008
4009static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
4010{
4011	struct fib6_nh_del_cached_rt_arg *arg = _arg;
4012	int rc;
4013
4014	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
4015	return rc != -ESRCH ? rc : 0;
4016}
4017
4018static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
4019{
4020	struct fib6_nh_del_cached_rt_arg arg = {
4021		.cfg = cfg,
4022		.f6i = f6i
4023	};
4024
4025	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
4026}
4027
4028static int ip6_route_del(struct fib6_config *cfg,
4029			 struct netlink_ext_ack *extack)
4030{
4031	struct fib6_table *table;
4032	struct fib6_info *rt;
4033	struct fib6_node *fn;
 
4034	int err = -ESRCH;
4035
4036	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
4037	if (!table) {
4038		NL_SET_ERR_MSG(extack, "FIB table does not exist");
4039		return err;
4040	}
4041
4042	rcu_read_lock();
4043
4044	fn = fib6_locate(&table->tb6_root,
4045			 &cfg->fc_dst, cfg->fc_dst_len,
4046			 &cfg->fc_src, cfg->fc_src_len,
4047			 !(cfg->fc_flags & RTF_CACHE));
4048
4049	if (fn) {
4050		for_each_fib6_node_rt_rcu(fn) {
4051			struct fib6_nh *nh;
4052
4053			if (rt->nh && cfg->fc_nh_id &&
4054			    rt->nh->id != cfg->fc_nh_id)
4055				continue;
4056
4057			if (cfg->fc_flags & RTF_CACHE) {
4058				int rc = 0;
4059
4060				if (rt->nh) {
4061					rc = ip6_del_cached_rt_nh(cfg, rt);
4062				} else if (cfg->fc_nh_id) {
4063					continue;
4064				} else {
4065					nh = rt->fib6_nh;
4066					rc = ip6_del_cached_rt(cfg, rt, nh);
4067				}
4068				if (rc != -ESRCH) {
4069					rcu_read_unlock();
4070					return rc;
4071				}
4072				continue;
4073			}
4074
4075			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
4076				continue;
4077			if (cfg->fc_protocol &&
4078			    cfg->fc_protocol != rt->fib6_protocol)
4079				continue;
4080
4081			if (rt->nh) {
4082				if (!fib6_info_hold_safe(rt))
4083					continue;
4084				rcu_read_unlock();
4085
4086				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4087			}
4088			if (cfg->fc_nh_id)
4089				continue;
4090
4091			nh = rt->fib6_nh;
4092			if (cfg->fc_ifindex &&
4093			    (!nh->fib_nh_dev ||
4094			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
4095				continue;
4096			if (cfg->fc_flags & RTF_GATEWAY &&
4097			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
4098				continue;
4099			if (!fib6_info_hold_safe(rt))
4100				continue;
4101			rcu_read_unlock();
 
4102
4103			/* if gateway was specified only delete the one hop */
4104			if (cfg->fc_flags & RTF_GATEWAY)
4105				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
4106
4107			return __ip6_del_rt_siblings(rt, cfg);
4108		}
4109	}
4110	rcu_read_unlock();
4111
4112	return err;
4113}
4114
4115static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
4116{
4117	struct netevent_redirect netevent;
4118	struct rt6_info *rt, *nrt = NULL;
4119	struct fib6_result res = {};
4120	struct ndisc_options ndopts;
4121	struct inet6_dev *in6_dev;
4122	struct neighbour *neigh;
4123	struct rd_msg *msg;
4124	int optlen, on_link;
4125	u8 *lladdr;
4126
4127	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4128	optlen -= sizeof(*msg);
4129
4130	if (optlen < 0) {
4131		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4132		return;
4133	}
4134
4135	msg = (struct rd_msg *)icmp6_hdr(skb);
4136
4137	if (ipv6_addr_is_multicast(&msg->dest)) {
4138		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4139		return;
4140	}
4141
4142	on_link = 0;
4143	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4144		on_link = 1;
4145	} else if (ipv6_addr_type(&msg->target) !=
4146		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4147		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4148		return;
4149	}
4150
4151	in6_dev = __in6_dev_get(skb->dev);
4152	if (!in6_dev)
4153		return;
4154	if (READ_ONCE(in6_dev->cnf.forwarding) ||
4155	    !READ_ONCE(in6_dev->cnf.accept_redirects))
4156		return;
4157
4158	/* RFC2461 8.1:
4159	 *	The IP source address of the Redirect MUST be the same as the current
4160	 *	first-hop router for the specified ICMP Destination Address.
4161	 */
4162
4163	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4164		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4165		return;
4166	}
4167
4168	lladdr = NULL;
4169	if (ndopts.nd_opts_tgt_lladdr) {
4170		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4171					     skb->dev);
4172		if (!lladdr) {
4173			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4174			return;
4175		}
4176	}
4177
4178	rt = dst_rt6_info(dst);
4179	if (rt->rt6i_flags & RTF_REJECT) {
4180		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4181		return;
4182	}
4183
4184	/* Redirect received -> path was valid.
4185	 * Look, redirects are sent only in response to data packets,
4186	 * so that this nexthop apparently is reachable. --ANK
4187	 */
4188	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4189
4190	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4191	if (!neigh)
4192		return;
4193
4194	/*
4195	 *	We have finally decided to accept it.
4196	 */
4197
4198	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4199		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4200		     NEIGH_UPDATE_F_OVERRIDE|
4201		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4202				     NEIGH_UPDATE_F_ISROUTER)),
4203		     NDISC_REDIRECT, &ndopts);
4204
4205	rcu_read_lock();
4206	res.f6i = rcu_dereference(rt->from);
4207	if (!res.f6i)
4208		goto out;
4209
4210	if (res.f6i->nh) {
4211		struct fib6_nh_match_arg arg = {
4212			.dev = dst->dev,
4213			.gw = &rt->rt6i_gateway,
4214		};
4215
4216		nexthop_for_each_fib6_nh(res.f6i->nh,
4217					 fib6_nh_find_match, &arg);
4218
4219		/* fib6_info uses a nexthop that does not have fib6_nh
4220		 * using the dst->dev. Should be impossible
4221		 */
4222		if (!arg.match)
4223			goto out;
4224		res.nh = arg.match;
4225	} else {
4226		res.nh = res.f6i->fib6_nh;
4227	}
4228
4229	res.fib6_flags = res.f6i->fib6_flags;
4230	res.fib6_type = res.f6i->fib6_type;
4231	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4232	if (!nrt)
4233		goto out;
4234
4235	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4236	if (on_link)
4237		nrt->rt6i_flags &= ~RTF_GATEWAY;
4238
4239	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4240
4241	/* rt6_insert_exception() will take care of duplicated exceptions */
4242	if (rt6_insert_exception(nrt, &res)) {
4243		dst_release_immediate(&nrt->dst);
4244		goto out;
4245	}
4246
4247	netevent.old = &rt->dst;
4248	netevent.new = &nrt->dst;
4249	netevent.daddr = &msg->dest;
4250	netevent.neigh = neigh;
4251	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4252
 
 
 
 
 
4253out:
4254	rcu_read_unlock();
4255	neigh_release(neigh);
4256}
4257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4258#ifdef CONFIG_IPV6_ROUTE_INFO
4259static struct fib6_info *rt6_get_route_info(struct net *net,
4260					   const struct in6_addr *prefix, int prefixlen,
4261					   const struct in6_addr *gwaddr,
4262					   struct net_device *dev)
4263{
4264	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4265	int ifindex = dev->ifindex;
4266	struct fib6_node *fn;
4267	struct fib6_info *rt = NULL;
4268	struct fib6_table *table;
4269
4270	table = fib6_get_table(net, tb_id);
4271	if (!table)
4272		return NULL;
4273
4274	rcu_read_lock();
4275	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4276	if (!fn)
4277		goto out;
4278
4279	for_each_fib6_node_rt_rcu(fn) {
4280		/* these routes do not use nexthops */
4281		if (rt->nh)
4282			continue;
4283		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4284			continue;
4285		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4286		    !rt->fib6_nh->fib_nh_gw_family)
4287			continue;
4288		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4289			continue;
4290		if (!fib6_info_hold_safe(rt))
4291			continue;
 
4292		break;
4293	}
4294out:
4295	rcu_read_unlock();
4296	return rt;
4297}
4298
4299static struct fib6_info *rt6_add_route_info(struct net *net,
4300					   const struct in6_addr *prefix, int prefixlen,
4301					   const struct in6_addr *gwaddr,
4302					   struct net_device *dev,
4303					   unsigned int pref)
4304{
4305	struct fib6_config cfg = {
4306		.fc_metric	= IP6_RT_PRIO_USER,
4307		.fc_ifindex	= dev->ifindex,
4308		.fc_dst_len	= prefixlen,
4309		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4310				  RTF_UP | RTF_PREF(pref),
4311		.fc_protocol = RTPROT_RA,
4312		.fc_type = RTN_UNICAST,
4313		.fc_nlinfo.portid = 0,
4314		.fc_nlinfo.nlh = NULL,
4315		.fc_nlinfo.nl_net = net,
4316	};
4317
4318	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4319	cfg.fc_dst = *prefix;
4320	cfg.fc_gateway = *gwaddr;
4321
4322	/* We should treat it as a default route if prefix length is 0. */
4323	if (!prefixlen)
4324		cfg.fc_flags |= RTF_DEFAULT;
4325
4326	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4327
4328	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4329}
4330#endif
4331
4332struct fib6_info *rt6_get_dflt_router(struct net *net,
4333				     const struct in6_addr *addr,
4334				     struct net_device *dev)
4335{
4336	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4337	struct fib6_info *rt;
4338	struct fib6_table *table;
4339
4340	table = fib6_get_table(net, tb_id);
4341	if (!table)
4342		return NULL;
4343
4344	rcu_read_lock();
4345	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4346		struct fib6_nh *nh;
4347
4348		/* RA routes do not use nexthops */
4349		if (rt->nh)
4350			continue;
4351
4352		nh = rt->fib6_nh;
4353		if (dev == nh->fib_nh_dev &&
4354		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4355		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4356			break;
4357	}
4358	if (rt && !fib6_info_hold_safe(rt))
4359		rt = NULL;
4360	rcu_read_unlock();
4361	return rt;
4362}
4363
4364struct fib6_info *rt6_add_dflt_router(struct net *net,
4365				     const struct in6_addr *gwaddr,
4366				     struct net_device *dev,
4367				     unsigned int pref,
4368				     u32 defrtr_usr_metric,
4369				     int lifetime)
4370{
4371	struct fib6_config cfg = {
4372		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4373		.fc_metric	= defrtr_usr_metric,
4374		.fc_ifindex	= dev->ifindex,
4375		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4376				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4377		.fc_protocol = RTPROT_RA,
4378		.fc_type = RTN_UNICAST,
4379		.fc_nlinfo.portid = 0,
4380		.fc_nlinfo.nlh = NULL,
4381		.fc_nlinfo.nl_net = net,
4382		.fc_expires = jiffies_to_clock_t(lifetime * HZ),
4383	};
4384
4385	cfg.fc_gateway = *gwaddr;
4386
4387	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4388		struct fib6_table *table;
4389
4390		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4391		if (table)
4392			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4393	}
4394
4395	return rt6_get_dflt_router(net, gwaddr, dev);
4396}
4397
4398static void __rt6_purge_dflt_routers(struct net *net,
4399				     struct fib6_table *table)
4400{
4401	struct fib6_info *rt;
4402
4403restart:
4404	rcu_read_lock();
4405	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4406		struct net_device *dev = fib6_info_nh_dev(rt);
4407		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4408
4409		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4410		    (!idev || idev->cnf.accept_ra != 2) &&
4411		    fib6_info_hold_safe(rt)) {
4412			rcu_read_unlock();
4413			ip6_del_rt(net, rt, false);
4414			goto restart;
4415		}
4416	}
4417	rcu_read_unlock();
4418
4419	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4420}
4421
4422void rt6_purge_dflt_routers(struct net *net)
4423{
 
4424	struct fib6_table *table;
4425	struct hlist_head *head;
4426	unsigned int h;
4427
4428	rcu_read_lock();
 
 
 
4429
4430	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4431		head = &net->ipv6.fib_table_hash[h];
4432		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4433			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4434				__rt6_purge_dflt_routers(net, table);
 
 
 
 
4435		}
4436	}
4437
4438	rcu_read_unlock();
4439}
4440
4441static void rtmsg_to_fib6_config(struct net *net,
4442				 struct in6_rtmsg *rtmsg,
4443				 struct fib6_config *cfg)
4444{
4445	*cfg = (struct fib6_config){
4446		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4447			 : RT6_TABLE_MAIN,
4448		.fc_ifindex = rtmsg->rtmsg_ifindex,
4449		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4450		.fc_expires = rtmsg->rtmsg_info,
4451		.fc_dst_len = rtmsg->rtmsg_dst_len,
4452		.fc_src_len = rtmsg->rtmsg_src_len,
4453		.fc_flags = rtmsg->rtmsg_flags,
4454		.fc_type = rtmsg->rtmsg_type,
4455
4456		.fc_nlinfo.nl_net = net,
4457
4458		.fc_dst = rtmsg->rtmsg_dst,
4459		.fc_src = rtmsg->rtmsg_src,
4460		.fc_gateway = rtmsg->rtmsg_gateway,
4461	};
4462}
4463
4464int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4465{
4466	struct fib6_config cfg;
 
4467	int err;
4468
4469	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4470		return -EINVAL;
4471	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4472		return -EPERM;
 
 
 
 
 
 
 
4473
4474	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
 
 
 
 
 
 
 
 
 
 
4475
4476	rtnl_lock();
4477	switch (cmd) {
4478	case SIOCADDRT:
4479		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4480		break;
4481	case SIOCDELRT:
4482		err = ip6_route_del(&cfg, NULL);
4483		break;
4484	}
4485	rtnl_unlock();
4486	return err;
4487}
4488
4489/*
4490 *	Drop the packet on the floor
4491 */
4492
4493static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4494{
 
4495	struct dst_entry *dst = skb_dst(skb);
4496	struct net *net = dev_net(dst->dev);
4497	struct inet6_dev *idev;
4498	SKB_DR(reason);
4499	int type;
4500
4501	if (netif_is_l3_master(skb->dev) ||
4502	    dst->dev == net->loopback_dev)
4503		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4504	else
4505		idev = ip6_dst_idev(dst);
4506
4507	switch (ipstats_mib_noroutes) {
4508	case IPSTATS_MIB_INNOROUTES:
4509		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4510		if (type == IPV6_ADDR_ANY) {
4511			SKB_DR_SET(reason, IP_INADDRERRORS);
4512			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
4513			break;
4514		}
4515		SKB_DR_SET(reason, IP_INNOROUTES);
4516		fallthrough;
4517	case IPSTATS_MIB_OUTNOROUTES:
4518		SKB_DR_OR(reason, IP_OUTNOROUTES);
4519		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
4520		break;
4521	}
4522
4523	/* Start over by dropping the dst for l3mdev case */
4524	if (netif_is_l3_master(skb->dev))
4525		skb_dst_drop(skb);
4526
4527	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4528	kfree_skb_reason(skb, reason);
4529	return 0;
4530}
4531
4532static int ip6_pkt_discard(struct sk_buff *skb)
4533{
4534	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4535}
4536
4537static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4538{
4539	skb->dev = skb_dst(skb)->dev;
4540	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4541}
4542
4543static int ip6_pkt_prohibit(struct sk_buff *skb)
4544{
4545	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4546}
4547
4548static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4549{
4550	skb->dev = skb_dst(skb)->dev;
4551	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4552}
4553
4554/*
4555 *	Allocate a dst for local (unicast / anycast) address.
4556 */
4557
4558struct fib6_info *addrconf_f6i_alloc(struct net *net,
4559				     struct inet6_dev *idev,
4560				     const struct in6_addr *addr,
4561				     bool anycast, gfp_t gfp_flags,
4562				     struct netlink_ext_ack *extack)
4563{
4564	struct fib6_config cfg = {
4565		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4566		.fc_ifindex = idev->dev->ifindex,
4567		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4568		.fc_dst = *addr,
4569		.fc_dst_len = 128,
4570		.fc_protocol = RTPROT_KERNEL,
4571		.fc_nlinfo.nl_net = net,
4572		.fc_ignore_dev_down = true,
4573	};
4574	struct fib6_info *f6i;
 
 
 
 
 
 
4575
4576	if (anycast) {
4577		cfg.fc_type = RTN_ANYCAST;
4578		cfg.fc_flags |= RTF_ANYCAST;
4579	} else {
4580		cfg.fc_type = RTN_LOCAL;
4581		cfg.fc_flags |= RTF_LOCAL;
4582	}
4583
4584	f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
4585	if (!IS_ERR(f6i)) {
4586		f6i->dst_nocount = true;
4587
4588		if (!anycast &&
4589		    (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
4590		     READ_ONCE(idev->cnf.disable_policy)))
4591			f6i->dst_nopolicy = true;
4592	}
4593
4594	return f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4595}
4596
4597/* remove deleted ip from prefsrc entries */
4598struct arg_dev_net_ip {
 
4599	struct net *net;
4600	struct in6_addr *addr;
4601};
4602
4603static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4604{
 
4605	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4606	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4607
4608	if (!rt->nh &&
4609	    rt != net->ipv6.fib6_null_entry &&
4610	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
4611	    !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
4612		spin_lock_bh(&rt6_exception_lock);
4613		/* remove prefsrc entry */
4614		rt->fib6_prefsrc.plen = 0;
4615		spin_unlock_bh(&rt6_exception_lock);
4616	}
4617	return 0;
4618}
4619
4620void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4621{
4622	struct net *net = dev_net(ifp->idev->dev);
4623	struct arg_dev_net_ip adni = {
 
4624		.net = net,
4625		.addr = &ifp->addr,
4626	};
4627	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4628}
4629
4630#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
4631
4632/* Remove routers and update dst entries when gateway turn into host. */
4633static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4634{
4635	struct in6_addr *gateway = (struct in6_addr *)arg;
4636	struct fib6_nh *nh;
4637
4638	/* RA routes do not use nexthops */
4639	if (rt->nh)
4640		return 0;
4641
4642	nh = rt->fib6_nh;
4643	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4644	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4645		return -1;
4646
4647	/* Further clean up cached routes in exception table.
4648	 * This is needed because cached route may have a different
4649	 * gateway than its 'parent' in the case of an ip redirect.
4650	 */
4651	fib6_nh_exceptions_clean_tohost(nh, gateway);
4652
4653	return 0;
4654}
4655
4656void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4657{
4658	fib6_clean_all(net, fib6_clean_tohost, gateway);
4659}
4660
4661struct arg_netdev_event {
4662	const struct net_device *dev;
4663	union {
4664		unsigned char nh_flags;
4665		unsigned long event;
4666	};
4667};
4668
4669static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4670{
4671	struct fib6_info *iter;
4672	struct fib6_node *fn;
4673
4674	fn = rcu_dereference_protected(rt->fib6_node,
4675			lockdep_is_held(&rt->fib6_table->tb6_lock));
4676	iter = rcu_dereference_protected(fn->leaf,
4677			lockdep_is_held(&rt->fib6_table->tb6_lock));
4678	while (iter) {
4679		if (iter->fib6_metric == rt->fib6_metric &&
4680		    rt6_qualify_for_ecmp(iter))
4681			return iter;
4682		iter = rcu_dereference_protected(iter->fib6_next,
4683				lockdep_is_held(&rt->fib6_table->tb6_lock));
4684	}
4685
4686	return NULL;
4687}
4688
4689/* only called for fib entries with builtin fib6_nh */
4690static bool rt6_is_dead(const struct fib6_info *rt)
4691{
4692	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4693	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4694	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4695		return true;
4696
4697	return false;
4698}
4699
4700static int rt6_multipath_total_weight(const struct fib6_info *rt)
4701{
4702	struct fib6_info *iter;
4703	int total = 0;
4704
4705	if (!rt6_is_dead(rt))
4706		total += rt->fib6_nh->fib_nh_weight;
4707
4708	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4709		if (!rt6_is_dead(iter))
4710			total += iter->fib6_nh->fib_nh_weight;
4711	}
4712
4713	return total;
4714}
4715
4716static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4717{
4718	int upper_bound = -1;
4719
4720	if (!rt6_is_dead(rt)) {
4721		*weight += rt->fib6_nh->fib_nh_weight;
4722		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4723						    total) - 1;
4724	}
4725	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4726}
4727
4728static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4729{
4730	struct fib6_info *iter;
4731	int weight = 0;
4732
4733	rt6_upper_bound_set(rt, &weight, total);
4734
4735	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4736		rt6_upper_bound_set(iter, &weight, total);
4737}
4738
4739void rt6_multipath_rebalance(struct fib6_info *rt)
4740{
4741	struct fib6_info *first;
4742	int total;
4743
4744	/* In case the entire multipath route was marked for flushing,
4745	 * then there is no need to rebalance upon the removal of every
4746	 * sibling route.
4747	 */
4748	if (!rt->fib6_nsiblings || rt->should_flush)
4749		return;
4750
4751	/* During lookup routes are evaluated in order, so we need to
4752	 * make sure upper bounds are assigned from the first sibling
4753	 * onwards.
4754	 */
4755	first = rt6_multipath_first_sibling(rt);
4756	if (WARN_ON_ONCE(!first))
4757		return;
4758
4759	total = rt6_multipath_total_weight(first);
4760	rt6_multipath_upper_bound_set(first, total);
4761}
4762
4763static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4764{
4765	const struct arg_netdev_event *arg = p_arg;
4766	struct net *net = dev_net(arg->dev);
4767
4768	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4769	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4770		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4771		fib6_update_sernum_upto_root(net, rt);
4772		rt6_multipath_rebalance(rt);
4773	}
4774
4775	return 0;
4776}
4777
4778void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4779{
4780	struct arg_netdev_event arg = {
4781		.dev = dev,
4782		{
4783			.nh_flags = nh_flags,
4784		},
4785	};
4786
4787	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4788		arg.nh_flags |= RTNH_F_LINKDOWN;
4789
4790	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4791}
4792
4793/* only called for fib entries with inline fib6_nh */
4794static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4795				   const struct net_device *dev)
4796{
4797	struct fib6_info *iter;
4798
4799	if (rt->fib6_nh->fib_nh_dev == dev)
4800		return true;
4801	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4802		if (iter->fib6_nh->fib_nh_dev == dev)
4803			return true;
4804
4805	return false;
4806}
4807
4808static void rt6_multipath_flush(struct fib6_info *rt)
4809{
4810	struct fib6_info *iter;
4811
4812	rt->should_flush = 1;
4813	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4814		iter->should_flush = 1;
4815}
4816
4817static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4818					     const struct net_device *down_dev)
4819{
4820	struct fib6_info *iter;
4821	unsigned int dead = 0;
4822
4823	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4824	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4825		dead++;
4826	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4827		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4828		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4829			dead++;
4830
4831	return dead;
4832}
4833
4834static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4835				       const struct net_device *dev,
4836				       unsigned char nh_flags)
4837{
4838	struct fib6_info *iter;
4839
4840	if (rt->fib6_nh->fib_nh_dev == dev)
4841		rt->fib6_nh->fib_nh_flags |= nh_flags;
4842	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4843		if (iter->fib6_nh->fib_nh_dev == dev)
4844			iter->fib6_nh->fib_nh_flags |= nh_flags;
4845}
4846
4847/* called with write lock held for table with rt */
4848static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4849{
4850	const struct arg_netdev_event *arg = p_arg;
4851	const struct net_device *dev = arg->dev;
4852	struct net *net = dev_net(dev);
4853
4854	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4855		return 0;
4856
4857	switch (arg->event) {
4858	case NETDEV_UNREGISTER:
4859		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4860	case NETDEV_DOWN:
4861		if (rt->should_flush)
4862			return -1;
4863		if (!rt->fib6_nsiblings)
4864			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4865		if (rt6_multipath_uses_dev(rt, dev)) {
4866			unsigned int count;
4867
4868			count = rt6_multipath_dead_count(rt, dev);
4869			if (rt->fib6_nsiblings + 1 == count) {
4870				rt6_multipath_flush(rt);
4871				return -1;
4872			}
4873			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4874						   RTNH_F_LINKDOWN);
4875			fib6_update_sernum(net, rt);
4876			rt6_multipath_rebalance(rt);
4877		}
4878		return -2;
4879	case NETDEV_CHANGE:
4880		if (rt->fib6_nh->fib_nh_dev != dev ||
4881		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4882			break;
4883		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4884		rt6_multipath_rebalance(rt);
4885		break;
4886	}
4887
4888	return 0;
4889}
4890
4891void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4892{
4893	struct arg_netdev_event arg = {
4894		.dev = dev,
4895		{
4896			.event = event,
4897		},
4898	};
4899	struct net *net = dev_net(dev);
4900
4901	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4902		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4903	else
4904		fib6_clean_all(net, fib6_ifdown, &arg);
4905}
4906
4907void rt6_disable_ip(struct net_device *dev, unsigned long event)
4908{
4909	rt6_sync_down_dev(dev, event);
4910	rt6_uncached_list_flush_dev(dev);
4911	neigh_ifdown(&nd_tbl, dev);
4912}
4913
4914struct rt6_mtu_change_arg {
4915	struct net_device *dev;
4916	unsigned int mtu;
4917	struct fib6_info *f6i;
4918};
4919
4920static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4921{
4922	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4923	struct fib6_info *f6i = arg->f6i;
4924
4925	/* For administrative MTU increase, there is no way to discover
4926	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4927	 * Since RFC 1981 doesn't include administrative MTU increase
4928	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4929	 */
4930	if (nh->fib_nh_dev == arg->dev) {
4931		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4932		u32 mtu = f6i->fib6_pmtu;
4933
4934		if (mtu >= arg->mtu ||
4935		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4936			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4937
4938		spin_lock_bh(&rt6_exception_lock);
4939		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4940		spin_unlock_bh(&rt6_exception_lock);
4941	}
4942
4943	return 0;
4944}
4945
4946static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4947{
4948	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4949	struct inet6_dev *idev;
4950
4951	/* In IPv6 pmtu discovery is not optional,
4952	   so that RTAX_MTU lock cannot disable it.
4953	   We still use this lock to block changes
4954	   caused by addrconf/ndisc.
4955	*/
4956
4957	idev = __in6_dev_get(arg->dev);
4958	if (!idev)
4959		return 0;
4960
4961	if (fib6_metric_locked(f6i, RTAX_MTU))
4962		return 0;
4963
4964	arg->f6i = f6i;
4965	if (f6i->nh) {
4966		/* fib6_nh_mtu_change only returns 0, so this is safe */
4967		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4968						arg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4969	}
4970
4971	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4972}
4973
4974void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4975{
4976	struct rt6_mtu_change_arg arg = {
4977		.dev = dev,
4978		.mtu = mtu,
4979	};
4980
4981	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4982}
4983
4984static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4985	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4986	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4987	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4988	[RTA_OIF]               = { .type = NLA_U32 },
4989	[RTA_IIF]		= { .type = NLA_U32 },
4990	[RTA_PRIORITY]          = { .type = NLA_U32 },
4991	[RTA_METRICS]           = { .type = NLA_NESTED },
4992	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4993	[RTA_PREF]              = { .type = NLA_U8 },
4994	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4995	[RTA_ENCAP]		= { .type = NLA_NESTED },
4996	[RTA_EXPIRES]		= { .type = NLA_U32 },
4997	[RTA_UID]		= { .type = NLA_U32 },
4998	[RTA_MARK]		= { .type = NLA_U32 },
4999	[RTA_TABLE]		= { .type = NLA_U32 },
5000	[RTA_IP_PROTO]		= { .type = NLA_U8 },
5001	[RTA_SPORT]		= { .type = NLA_U16 },
5002	[RTA_DPORT]		= { .type = NLA_U16 },
5003	[RTA_NH_ID]		= { .type = NLA_U32 },
5004};
5005
5006static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
5007			      struct fib6_config *cfg,
5008			      struct netlink_ext_ack *extack)
5009{
5010	struct rtmsg *rtm;
5011	struct nlattr *tb[RTA_MAX+1];
5012	unsigned int pref;
5013	int err;
5014
5015	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5016				     rtm_ipv6_policy, extack);
5017	if (err < 0)
5018		goto errout;
5019
5020	err = -EINVAL;
5021	rtm = nlmsg_data(nlh);
 
5022
5023	if (rtm->rtm_tos) {
5024		NL_SET_ERR_MSG(extack,
5025			       "Invalid dsfield (tos): option not available for IPv6");
5026		goto errout;
5027	}
5028
5029	*cfg = (struct fib6_config){
5030		.fc_table = rtm->rtm_table,
5031		.fc_dst_len = rtm->rtm_dst_len,
5032		.fc_src_len = rtm->rtm_src_len,
5033		.fc_flags = RTF_UP,
5034		.fc_protocol = rtm->rtm_protocol,
5035		.fc_type = rtm->rtm_type,
5036
5037		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
5038		.fc_nlinfo.nlh = nlh,
5039		.fc_nlinfo.nl_net = sock_net(skb->sk),
5040	};
5041
5042	if (rtm->rtm_type == RTN_UNREACHABLE ||
5043	    rtm->rtm_type == RTN_BLACKHOLE ||
5044	    rtm->rtm_type == RTN_PROHIBIT ||
5045	    rtm->rtm_type == RTN_THROW)
5046		cfg->fc_flags |= RTF_REJECT;
5047
5048	if (rtm->rtm_type == RTN_LOCAL)
5049		cfg->fc_flags |= RTF_LOCAL;
5050
5051	if (rtm->rtm_flags & RTM_F_CLONED)
5052		cfg->fc_flags |= RTF_CACHE;
5053
5054	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
5055
5056	if (tb[RTA_NH_ID]) {
5057		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
5058		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
5059			NL_SET_ERR_MSG(extack,
5060				       "Nexthop specification and nexthop id are mutually exclusive");
5061			goto errout;
5062		}
5063		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
5064	}
5065
5066	if (tb[RTA_GATEWAY]) {
5067		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
5068		cfg->fc_flags |= RTF_GATEWAY;
5069	}
5070	if (tb[RTA_VIA]) {
5071		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
5072		goto errout;
5073	}
5074
5075	if (tb[RTA_DST]) {
5076		int plen = (rtm->rtm_dst_len + 7) >> 3;
5077
5078		if (nla_len(tb[RTA_DST]) < plen)
5079			goto errout;
5080
5081		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
5082	}
5083
5084	if (tb[RTA_SRC]) {
5085		int plen = (rtm->rtm_src_len + 7) >> 3;
5086
5087		if (nla_len(tb[RTA_SRC]) < plen)
5088			goto errout;
5089
5090		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
5091	}
5092
5093	if (tb[RTA_PREFSRC])
5094		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
5095
5096	if (tb[RTA_OIF])
5097		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
5098
5099	if (tb[RTA_PRIORITY])
5100		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
5101
5102	if (tb[RTA_METRICS]) {
5103		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
5104		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
5105	}
5106
5107	if (tb[RTA_TABLE])
5108		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
5109
5110	if (tb[RTA_MULTIPATH]) {
5111		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
5112		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
5113
5114		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
5115						     cfg->fc_mp_len, extack);
5116		if (err < 0)
5117			goto errout;
5118	}
5119
5120	if (tb[RTA_PREF]) {
5121		pref = nla_get_u8(tb[RTA_PREF]);
5122		if (pref != ICMPV6_ROUTER_PREF_LOW &&
5123		    pref != ICMPV6_ROUTER_PREF_HIGH)
5124			pref = ICMPV6_ROUTER_PREF_MEDIUM;
5125		cfg->fc_flags |= RTF_PREF(pref);
5126	}
5127
5128	if (tb[RTA_ENCAP])
5129		cfg->fc_encap = tb[RTA_ENCAP];
5130
5131	if (tb[RTA_ENCAP_TYPE]) {
5132		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5133
5134		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
5135		if (err < 0)
5136			goto errout;
5137	}
5138
5139	if (tb[RTA_EXPIRES]) {
5140		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5141
5142		if (addrconf_finite_timeout(timeout)) {
5143			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5144			cfg->fc_flags |= RTF_EXPIRES;
5145		}
5146	}
5147
5148	err = 0;
5149errout:
5150	return err;
5151}
5152
5153struct rt6_nh {
5154	struct fib6_info *fib6_info;
5155	struct fib6_config r_cfg;
 
5156	struct list_head next;
5157};
5158
5159static int ip6_route_info_append(struct net *net,
5160				 struct list_head *rt6_nh_list,
5161				 struct fib6_info *rt,
5162				 struct fib6_config *r_cfg)
 
 
 
 
 
 
 
 
 
5163{
5164	struct rt6_nh *nh;
 
5165	int err = -EEXIST;
5166
5167	list_for_each_entry(nh, rt6_nh_list, next) {
5168		/* check if fib6_info already exists */
5169		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
 
 
 
 
 
5170			return err;
5171	}
5172
5173	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5174	if (!nh)
5175		return -ENOMEM;
5176	nh->fib6_info = rt;
 
 
 
 
 
5177	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5178	list_add_tail(&nh->next, rt6_nh_list);
5179
5180	return 0;
5181}
5182
5183static void ip6_route_mpath_notify(struct fib6_info *rt,
5184				   struct fib6_info *rt_last,
5185				   struct nl_info *info,
5186				   __u16 nlflags)
5187{
5188	/* if this is an APPEND route, then rt points to the first route
5189	 * inserted and rt_last points to last route inserted. Userspace
5190	 * wants a consistent dump of the route which starts at the first
5191	 * nexthop. Since sibling routes are always added at the end of
5192	 * the list, find the first sibling of the last route appended
5193	 */
5194	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5195		rt = list_first_entry(&rt_last->fib6_siblings,
5196				      struct fib6_info,
5197				      fib6_siblings);
5198	}
5199
5200	if (rt)
5201		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5202}
5203
5204static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5205{
5206	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5207	bool should_notify = false;
5208	struct fib6_info *leaf;
5209	struct fib6_node *fn;
5210
5211	rcu_read_lock();
5212	fn = rcu_dereference(rt->fib6_node);
5213	if (!fn)
5214		goto out;
5215
5216	leaf = rcu_dereference(fn->leaf);
5217	if (!leaf)
5218		goto out;
5219
5220	if (rt == leaf ||
5221	    (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5222	     rt6_qualify_for_ecmp(leaf)))
5223		should_notify = true;
5224out:
5225	rcu_read_unlock();
5226
5227	return should_notify;
5228}
5229
5230static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
5231			     struct netlink_ext_ack *extack)
5232{
5233	if (nla_len(nla) < sizeof(*gw)) {
5234		NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
5235		return -EINVAL;
5236	}
5237
5238	*gw = nla_get_in6_addr(nla);
5239
5240	return 0;
5241}
5242
5243static int ip6_route_multipath_add(struct fib6_config *cfg,
5244				   struct netlink_ext_ack *extack)
5245{
5246	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5247	struct nl_info *info = &cfg->fc_nlinfo;
5248	struct fib6_config r_cfg;
5249	struct rtnexthop *rtnh;
5250	struct fib6_info *rt;
5251	struct rt6_nh *err_nh;
5252	struct rt6_nh *nh, *nh_safe;
5253	__u16 nlflags;
5254	int remaining;
5255	int attrlen;
5256	int err = 1;
5257	int nhn = 0;
5258	int replace = (cfg->fc_nlinfo.nlh &&
5259		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5260	LIST_HEAD(rt6_nh_list);
5261
5262	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5263	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5264		nlflags |= NLM_F_APPEND;
5265
5266	remaining = cfg->fc_mp_len;
5267	rtnh = (struct rtnexthop *)cfg->fc_mp;
5268
5269	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5270	 * fib6_info structs per nexthop
5271	 */
5272	while (rtnh_ok(rtnh, remaining)) {
5273		memcpy(&r_cfg, cfg, sizeof(*cfg));
5274		if (rtnh->rtnh_ifindex)
5275			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5276
5277		attrlen = rtnh_attrlen(rtnh);
5278		if (attrlen > 0) {
5279			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5280
5281			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5282			if (nla) {
5283				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5284							extack);
5285				if (err)
5286					goto cleanup;
5287
5288				r_cfg.fc_flags |= RTF_GATEWAY;
5289			}
5290			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5291
5292			/* RTA_ENCAP_TYPE length checked in
5293			 * lwtunnel_valid_encap_type_attr
5294			 */
5295			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5296			if (nla)
5297				r_cfg.fc_encap_type = nla_get_u16(nla);
5298		}
5299
5300		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5301		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5302		if (IS_ERR(rt)) {
5303			err = PTR_ERR(rt);
5304			rt = NULL;
5305			goto cleanup;
5306		}
5307		if (!rt6_qualify_for_ecmp(rt)) {
5308			err = -EINVAL;
5309			NL_SET_ERR_MSG(extack,
5310				       "Device only routes can not be added for IPv6 using the multipath API.");
5311			fib6_info_release(rt);
5312			goto cleanup;
5313		}
5314
5315		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5316
5317		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5318					    rt, &r_cfg);
5319		if (err) {
5320			fib6_info_release(rt);
5321			goto cleanup;
5322		}
5323
5324		rtnh = rtnh_next(rtnh, &remaining);
5325	}
5326
5327	if (list_empty(&rt6_nh_list)) {
5328		NL_SET_ERR_MSG(extack,
5329			       "Invalid nexthop configuration - no valid nexthops");
5330		return -EINVAL;
5331	}
5332
5333	/* for add and replace send one notification with all nexthops.
5334	 * Skip the notification in fib6_add_rt2node and send one with
5335	 * the full route when done
5336	 */
5337	info->skip_notify = 1;
5338
5339	/* For add and replace, send one notification with all nexthops. For
5340	 * append, send one notification with all appended nexthops.
5341	 */
5342	info->skip_notify_kernel = 1;
5343
5344	err_nh = NULL;
5345	list_for_each_entry(nh, &rt6_nh_list, next) {
5346		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5347
 
5348		if (err) {
5349			if (replace && nhn)
5350				NL_SET_ERR_MSG_MOD(extack,
5351						   "multipath route replace failed (check consistency of installed routes)");
5352			err_nh = nh;
5353			goto add_errout;
5354		}
5355		/* save reference to last route successfully inserted */
5356		rt_last = nh->fib6_info;
5357
5358		/* save reference to first route for notification */
5359		if (!rt_notif)
5360			rt_notif = nh->fib6_info;
5361
5362		/* Because each route is added like a single route we remove
5363		 * these flags after the first nexthop: if there is a collision,
5364		 * we have already failed to add the first nexthop:
5365		 * fib6_add_rt2node() has rejected it; when replacing, old
5366		 * nexthops have been replaced by first new, the rest should
5367		 * be added to it.
5368		 */
5369		if (cfg->fc_nlinfo.nlh) {
5370			cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5371							     NLM_F_REPLACE);
5372			cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5373		}
5374		nhn++;
5375	}
5376
5377	/* An in-kernel notification should only be sent in case the new
5378	 * multipath route is added as the first route in the node, or if
5379	 * it was appended to it. We pass 'rt_notif' since it is the first
5380	 * sibling and might allow us to skip some checks in the replace case.
5381	 */
5382	if (ip6_route_mpath_should_notify(rt_notif)) {
5383		enum fib_event_type fib_event;
5384
5385		if (rt_notif->fib6_nsiblings != nhn - 1)
5386			fib_event = FIB_EVENT_ENTRY_APPEND;
5387		else
5388			fib_event = FIB_EVENT_ENTRY_REPLACE;
5389
5390		err = call_fib6_multipath_entry_notifiers(info->nl_net,
5391							  fib_event, rt_notif,
5392							  nhn - 1, extack);
5393		if (err) {
5394			/* Delete all the siblings that were just added */
5395			err_nh = NULL;
5396			goto add_errout;
5397		}
5398	}
5399
5400	/* success ... tell user about new route */
5401	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5402	goto cleanup;
5403
5404add_errout:
5405	/* send notification for routes that were added so that
5406	 * the delete notifications sent by ip6_route_del are
5407	 * coherent
5408	 */
5409	if (rt_notif)
5410		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5411
5412	/* Delete routes that were already added */
5413	list_for_each_entry(nh, &rt6_nh_list, next) {
5414		if (err_nh == nh)
5415			break;
5416		ip6_route_del(&nh->r_cfg, extack);
5417	}
5418
5419cleanup:
5420	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5421		fib6_info_release(nh->fib6_info);
 
 
5422		list_del(&nh->next);
5423		kfree(nh);
5424	}
5425
5426	return err;
5427}
5428
5429static int ip6_route_multipath_del(struct fib6_config *cfg,
5430				   struct netlink_ext_ack *extack)
5431{
5432	struct fib6_config r_cfg;
5433	struct rtnexthop *rtnh;
5434	int last_err = 0;
5435	int remaining;
5436	int attrlen;
5437	int err;
5438
5439	remaining = cfg->fc_mp_len;
5440	rtnh = (struct rtnexthop *)cfg->fc_mp;
5441
5442	/* Parse a Multipath Entry */
5443	while (rtnh_ok(rtnh, remaining)) {
5444		memcpy(&r_cfg, cfg, sizeof(*cfg));
5445		if (rtnh->rtnh_ifindex)
5446			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5447
5448		attrlen = rtnh_attrlen(rtnh);
5449		if (attrlen > 0) {
5450			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5451
5452			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5453			if (nla) {
5454				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5455							extack);
5456				if (err) {
5457					last_err = err;
5458					goto next_rtnh;
5459				}
5460
5461				r_cfg.fc_flags |= RTF_GATEWAY;
5462			}
5463		}
5464		err = ip6_route_del(&r_cfg, extack);
5465		if (err)
5466			last_err = err;
5467
5468next_rtnh:
5469		rtnh = rtnh_next(rtnh, &remaining);
5470	}
5471
5472	return last_err;
5473}
5474
5475static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5476			      struct netlink_ext_ack *extack)
5477{
5478	struct fib6_config cfg;
5479	int err;
5480
5481	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5482	if (err < 0)
5483		return err;
5484
5485	if (cfg.fc_nh_id &&
5486	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5487		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5488		return -EINVAL;
5489	}
5490
5491	if (cfg.fc_mp)
5492		return ip6_route_multipath_del(&cfg, extack);
5493	else {
5494		cfg.fc_delete_all_nh = 1;
5495		return ip6_route_del(&cfg, extack);
5496	}
5497}
5498
5499static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5500			      struct netlink_ext_ack *extack)
5501{
5502	struct fib6_config cfg;
5503	int err;
5504
5505	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5506	if (err < 0)
5507		return err;
5508
5509	if (cfg.fc_metric == 0)
5510		cfg.fc_metric = IP6_RT_PRIO_USER;
5511
5512	if (cfg.fc_mp)
5513		return ip6_route_multipath_add(&cfg, extack);
5514	else
5515		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5516}
5517
5518/* add the overhead of this fib6_nh to nexthop_len */
5519static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5520{
5521	int *nexthop_len = arg;
5522
5523	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5524		     + NLA_ALIGN(sizeof(struct rtnexthop))
5525		     + nla_total_size(16); /* RTA_GATEWAY */
5526
5527	if (nh->fib_nh_lws) {
5528		/* RTA_ENCAP_TYPE */
5529		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5530		/* RTA_ENCAP */
5531		*nexthop_len += nla_total_size(2);
5532	}
5533
5534	return 0;
5535}
5536
5537static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5538{
5539	int nexthop_len;
5540
5541	if (f6i->nh) {
5542		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5543		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5544					 &nexthop_len);
5545	} else {
5546		struct fib6_info *sibling, *next_sibling;
5547		struct fib6_nh *nh = f6i->fib6_nh;
5548
5549		nexthop_len = 0;
5550		if (f6i->fib6_nsiblings) {
5551			rt6_nh_nlmsg_size(nh, &nexthop_len);
5552
5553			list_for_each_entry_safe(sibling, next_sibling,
5554						 &f6i->fib6_siblings, fib6_siblings) {
5555				rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
5556			}
5557		}
5558		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5559	}
5560
5561	return NLMSG_ALIGN(sizeof(struct rtmsg))
5562	       + nla_total_size(16) /* RTA_SRC */
5563	       + nla_total_size(16) /* RTA_DST */
5564	       + nla_total_size(16) /* RTA_GATEWAY */
5565	       + nla_total_size(16) /* RTA_PREFSRC */
5566	       + nla_total_size(4) /* RTA_TABLE */
5567	       + nla_total_size(4) /* RTA_IIF */
5568	       + nla_total_size(4) /* RTA_OIF */
5569	       + nla_total_size(4) /* RTA_PRIORITY */
5570	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5571	       + nla_total_size(sizeof(struct rta_cacheinfo))
5572	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5573	       + nla_total_size(1) /* RTA_PREF */
5574	       + nexthop_len;
5575}
5576
5577static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5578				 unsigned char *flags)
 
 
 
5579{
5580	if (nexthop_is_multipath(nh)) {
5581		struct nlattr *mp;
 
 
 
5582
5583		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5584		if (!mp)
5585			goto nla_put_failure;
5586
5587		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5588			goto nla_put_failure;
5589
5590		nla_nest_end(skb, mp);
5591	} else {
5592		struct fib6_nh *fib6_nh;
5593
5594		fib6_nh = nexthop_fib6_nh(nh);
5595		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5596				     flags, false) < 0)
5597			goto nla_put_failure;
5598	}
5599
5600	return 0;
5601
5602nla_put_failure:
5603	return -EMSGSIZE;
5604}
5605
5606static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5607			 struct fib6_info *rt, struct dst_entry *dst,
5608			 struct in6_addr *dest, struct in6_addr *src,
5609			 int iif, int type, u32 portid, u32 seq,
5610			 unsigned int flags)
5611{
5612	struct rt6_info *rt6 = dst_rt6_info(dst);
5613	struct rt6key *rt6_dst, *rt6_src;
5614	u32 *pmetrics, table, rt6_flags;
5615	unsigned char nh_flags = 0;
5616	struct nlmsghdr *nlh;
5617	struct rtmsg *rtm;
5618	long expires = 0;
5619
5620	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5621	if (!nlh)
5622		return -EMSGSIZE;
5623
5624	if (rt6) {
5625		rt6_dst = &rt6->rt6i_dst;
5626		rt6_src = &rt6->rt6i_src;
5627		rt6_flags = rt6->rt6i_flags;
5628	} else {
5629		rt6_dst = &rt->fib6_dst;
5630		rt6_src = &rt->fib6_src;
5631		rt6_flags = rt->fib6_flags;
5632	}
5633
5634	rtm = nlmsg_data(nlh);
5635	rtm->rtm_family = AF_INET6;
5636	rtm->rtm_dst_len = rt6_dst->plen;
5637	rtm->rtm_src_len = rt6_src->plen;
5638	rtm->rtm_tos = 0;
5639	if (rt->fib6_table)
5640		table = rt->fib6_table->tb6_id;
5641	else
5642		table = RT6_TABLE_UNSPEC;
5643	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5644	if (nla_put_u32(skb, RTA_TABLE, table))
5645		goto nla_put_failure;
5646
5647	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5648	rtm->rtm_flags = 0;
 
 
 
 
 
5649	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5650	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
 
 
5651
5652	if (rt6_flags & RTF_CACHE)
5653		rtm->rtm_flags |= RTM_F_CLONED;
5654
5655	if (dest) {
5656		if (nla_put_in6_addr(skb, RTA_DST, dest))
5657			goto nla_put_failure;
5658		rtm->rtm_dst_len = 128;
5659	} else if (rtm->rtm_dst_len)
5660		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5661			goto nla_put_failure;
5662#ifdef CONFIG_IPV6_SUBTREES
5663	if (src) {
5664		if (nla_put_in6_addr(skb, RTA_SRC, src))
5665			goto nla_put_failure;
5666		rtm->rtm_src_len = 128;
5667	} else if (rtm->rtm_src_len &&
5668		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5669		goto nla_put_failure;
5670#endif
5671	if (iif) {
5672#ifdef CONFIG_IPV6_MROUTE
5673		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5674			int err = ip6mr_get_route(net, skb, rtm, portid);
5675
5676			if (err == 0)
5677				return 0;
5678			if (err < 0)
5679				goto nla_put_failure;
 
 
 
 
 
5680		} else
5681#endif
5682			if (nla_put_u32(skb, RTA_IIF, iif))
5683				goto nla_put_failure;
5684	} else if (dest) {
5685		struct in6_addr saddr_buf;
5686		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5687		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5688			goto nla_put_failure;
5689	}
5690
5691	if (rt->fib6_prefsrc.plen) {
5692		struct in6_addr saddr_buf;
5693		saddr_buf = rt->fib6_prefsrc.addr;
5694		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5695			goto nla_put_failure;
5696	}
5697
5698	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5699	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
 
 
5700		goto nla_put_failure;
5701
5702	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5703		goto nla_put_failure;
5704
5705	/* For multipath routes, walk the siblings list and add
5706	 * each as a nexthop within RTA_MULTIPATH.
5707	 */
5708	if (rt6) {
5709		if (rt6_flags & RTF_GATEWAY &&
5710		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5711			goto nla_put_failure;
5712
5713		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5714			goto nla_put_failure;
5715
5716		if (dst->lwtstate &&
5717		    lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
5718			goto nla_put_failure;
5719	} else if (rt->fib6_nsiblings) {
5720		struct fib6_info *sibling, *next_sibling;
5721		struct nlattr *mp;
5722
5723		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5724		if (!mp)
5725			goto nla_put_failure;
5726
5727		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5728				    rt->fib6_nh->fib_nh_weight, AF_INET6,
5729				    0) < 0)
5730			goto nla_put_failure;
5731
5732		list_for_each_entry_safe(sibling, next_sibling,
5733					 &rt->fib6_siblings, fib6_siblings) {
5734			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5735					    sibling->fib6_nh->fib_nh_weight,
5736					    AF_INET6, 0) < 0)
5737				goto nla_put_failure;
5738		}
5739
5740		nla_nest_end(skb, mp);
5741	} else if (rt->nh) {
5742		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5743			goto nla_put_failure;
5744
5745		if (nexthop_is_blackhole(rt->nh))
5746			rtm->rtm_type = RTN_BLACKHOLE;
5747
5748		if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
5749		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5750			goto nla_put_failure;
5751
5752		rtm->rtm_flags |= nh_flags;
5753	} else {
5754		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5755				     &nh_flags, false) < 0)
5756			goto nla_put_failure;
5757
5758		rtm->rtm_flags |= nh_flags;
5759	}
5760
5761	if (rt6_flags & RTF_EXPIRES) {
5762		expires = dst ? dst->expires : rt->expires;
5763		expires -= jiffies;
5764	}
 
5765
5766	if (!dst) {
5767		if (READ_ONCE(rt->offload))
5768			rtm->rtm_flags |= RTM_F_OFFLOAD;
5769		if (READ_ONCE(rt->trap))
5770			rtm->rtm_flags |= RTM_F_TRAP;
5771		if (READ_ONCE(rt->offload_failed))
5772			rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
5773	}
5774
5775	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5776		goto nla_put_failure;
5777
5778	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5779		goto nla_put_failure;
5780
 
5781
5782	nlmsg_end(skb, nlh);
5783	return 0;
5784
5785nla_put_failure:
5786	nlmsg_cancel(skb, nlh);
5787	return -EMSGSIZE;
5788}
5789
5790static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5791{
5792	const struct net_device *dev = arg;
5793
5794	if (nh->fib_nh_dev == dev)
5795		return 1;
5796
5797	return 0;
5798}
5799
5800static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5801			       const struct net_device *dev)
5802{
5803	if (f6i->nh) {
5804		struct net_device *_dev = (struct net_device *)dev;
5805
5806		return !!nexthop_for_each_fib6_nh(f6i->nh,
5807						  fib6_info_nh_uses_dev,
5808						  _dev);
5809	}
5810
5811	if (f6i->fib6_nh->fib_nh_dev == dev)
5812		return true;
5813
5814	if (f6i->fib6_nsiblings) {
5815		struct fib6_info *sibling, *next_sibling;
5816
5817		list_for_each_entry_safe(sibling, next_sibling,
5818					 &f6i->fib6_siblings, fib6_siblings) {
5819			if (sibling->fib6_nh->fib_nh_dev == dev)
5820				return true;
5821		}
5822	}
5823
5824	return false;
5825}
5826
5827struct fib6_nh_exception_dump_walker {
5828	struct rt6_rtnl_dump_arg *dump;
5829	struct fib6_info *rt;
5830	unsigned int flags;
5831	unsigned int skip;
5832	unsigned int count;
5833};
5834
5835static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5836{
5837	struct fib6_nh_exception_dump_walker *w = arg;
5838	struct rt6_rtnl_dump_arg *dump = w->dump;
5839	struct rt6_exception_bucket *bucket;
5840	struct rt6_exception *rt6_ex;
5841	int i, err;
5842
5843	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5844	if (!bucket)
5845		return 0;
5846
5847	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5848		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5849			if (w->skip) {
5850				w->skip--;
5851				continue;
5852			}
5853
5854			/* Expiration of entries doesn't bump sernum, insertion
5855			 * does. Removal is triggered by insertion, so we can
5856			 * rely on the fact that if entries change between two
5857			 * partial dumps, this node is scanned again completely,
5858			 * see rt6_insert_exception() and fib6_dump_table().
5859			 *
5860			 * Count expired entries we go through as handled
5861			 * entries that we'll skip next time, in case of partial
5862			 * node dump. Otherwise, if entries expire meanwhile,
5863			 * we'll skip the wrong amount.
5864			 */
5865			if (rt6_check_expired(rt6_ex->rt6i)) {
5866				w->count++;
5867				continue;
5868			}
5869
5870			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5871					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5872					    RTM_NEWROUTE,
5873					    NETLINK_CB(dump->cb->skb).portid,
5874					    dump->cb->nlh->nlmsg_seq, w->flags);
5875			if (err)
5876				return err;
5877
5878			w->count++;
5879		}
5880		bucket++;
5881	}
5882
5883	return 0;
5884}
5885
5886/* Return -1 if done with node, number of handled routes on partial dump */
5887int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5888{
5889	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5890	struct fib_dump_filter *filter = &arg->filter;
5891	unsigned int flags = NLM_F_MULTI;
5892	struct net *net = arg->net;
5893	int count = 0;
5894
5895	if (rt == net->ipv6.fib6_null_entry)
5896		return -1;
5897
5898	if ((filter->flags & RTM_F_PREFIX) &&
5899	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5900		/* success since this is not a prefix route */
5901		return -1;
5902	}
5903	if (filter->filter_set &&
5904	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5905	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5906	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5907		return -1;
5908	}
5909
5910	if (filter->filter_set ||
5911	    !filter->dump_routes || !filter->dump_exceptions) {
5912		flags |= NLM_F_DUMP_FILTERED;
5913	}
5914
5915	if (filter->dump_routes) {
5916		if (skip) {
5917			skip--;
5918		} else {
5919			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5920					  0, RTM_NEWROUTE,
5921					  NETLINK_CB(arg->cb->skb).portid,
5922					  arg->cb->nlh->nlmsg_seq, flags)) {
5923				return 0;
5924			}
5925			count++;
5926		}
5927	}
5928
5929	if (filter->dump_exceptions) {
5930		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5931							   .rt = rt,
5932							   .flags = flags,
5933							   .skip = skip,
5934							   .count = 0 };
5935		int err;
5936
5937		rcu_read_lock();
5938		if (rt->nh) {
5939			err = nexthop_for_each_fib6_nh(rt->nh,
5940						       rt6_nh_dump_exceptions,
5941						       &w);
5942		} else {
5943			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5944		}
5945		rcu_read_unlock();
5946
5947		if (err)
5948			return count + w.count;
5949	}
5950
5951	return -1;
5952}
5953
5954static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5955					const struct nlmsghdr *nlh,
5956					struct nlattr **tb,
5957					struct netlink_ext_ack *extack)
5958{
5959	struct rtmsg *rtm;
5960	int i, err;
5961
5962	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5963		NL_SET_ERR_MSG_MOD(extack,
5964				   "Invalid header for get route request");
5965		return -EINVAL;
5966	}
5967
5968	if (!netlink_strict_get_check(skb))
5969		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5970					      rtm_ipv6_policy, extack);
5971
5972	rtm = nlmsg_data(nlh);
5973	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5974	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5975	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5976	    rtm->rtm_type) {
5977		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5978		return -EINVAL;
5979	}
5980	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5981		NL_SET_ERR_MSG_MOD(extack,
5982				   "Invalid flags for get route request");
5983		return -EINVAL;
5984	}
5985
5986	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5987					    rtm_ipv6_policy, extack);
5988	if (err)
5989		return err;
5990
5991	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5992	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5993		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5994		return -EINVAL;
5995	}
5996
5997	for (i = 0; i <= RTA_MAX; i++) {
5998		if (!tb[i])
5999			continue;
6000
6001		switch (i) {
6002		case RTA_SRC:
6003		case RTA_DST:
6004		case RTA_IIF:
6005		case RTA_OIF:
6006		case RTA_MARK:
6007		case RTA_UID:
6008		case RTA_SPORT:
6009		case RTA_DPORT:
6010		case RTA_IP_PROTO:
6011			break;
6012		default:
6013			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
6014			return -EINVAL;
6015		}
6016	}
6017
6018	return 0;
6019}
6020
6021static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
6022			      struct netlink_ext_ack *extack)
6023{
6024	struct net *net = sock_net(in_skb->sk);
6025	struct nlattr *tb[RTA_MAX+1];
6026	int err, iif = 0, oif = 0;
6027	struct fib6_info *from;
6028	struct dst_entry *dst;
6029	struct rt6_info *rt;
6030	struct sk_buff *skb;
6031	struct rtmsg *rtm;
6032	struct flowi6 fl6 = {};
6033	bool fibmatch;
6034
6035	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
6036	if (err < 0)
6037		goto errout;
6038
6039	err = -EINVAL;
6040	rtm = nlmsg_data(nlh);
6041	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
6042	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
6043
6044	if (tb[RTA_SRC]) {
6045		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
6046			goto errout;
6047
6048		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
6049	}
6050
6051	if (tb[RTA_DST]) {
6052		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
6053			goto errout;
6054
6055		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
6056	}
6057
6058	if (tb[RTA_IIF])
6059		iif = nla_get_u32(tb[RTA_IIF]);
6060
6061	if (tb[RTA_OIF])
6062		oif = nla_get_u32(tb[RTA_OIF]);
6063
6064	if (tb[RTA_MARK])
6065		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
6066
6067	if (tb[RTA_UID])
6068		fl6.flowi6_uid = make_kuid(current_user_ns(),
6069					   nla_get_u32(tb[RTA_UID]));
6070	else
6071		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
6072
6073	if (tb[RTA_SPORT])
6074		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
6075
6076	if (tb[RTA_DPORT])
6077		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
6078
6079	if (tb[RTA_IP_PROTO]) {
6080		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
6081						  &fl6.flowi6_proto, AF_INET6,
6082						  extack);
6083		if (err)
6084			goto errout;
6085	}
6086
6087	if (iif) {
6088		struct net_device *dev;
6089		int flags = 0;
6090
6091		rcu_read_lock();
6092
6093		dev = dev_get_by_index_rcu(net, iif);
6094		if (!dev) {
6095			rcu_read_unlock();
6096			err = -ENODEV;
6097			goto errout;
6098		}
6099
6100		fl6.flowi6_iif = iif;
6101
6102		if (!ipv6_addr_any(&fl6.saddr))
6103			flags |= RT6_LOOKUP_F_HAS_SADDR;
6104
6105		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
6106
6107		rcu_read_unlock();
6108	} else {
6109		fl6.flowi6_oif = oif;
6110
6111		dst = ip6_route_output(net, NULL, &fl6);
6112	}
6113
6114
6115	rt = dst_rt6_info(dst);
6116	if (rt->dst.error) {
6117		err = rt->dst.error;
6118		ip6_rt_put(rt);
6119		goto errout;
6120	}
6121
6122	if (rt == net->ipv6.ip6_null_entry) {
6123		err = rt->dst.error;
6124		ip6_rt_put(rt);
6125		goto errout;
6126	}
6127
6128	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
6129	if (!skb) {
6130		ip6_rt_put(rt);
6131		err = -ENOBUFS;
6132		goto errout;
6133	}
6134
 
 
 
 
 
 
6135	skb_dst_set(skb, &rt->dst);
6136
6137	rcu_read_lock();
6138	from = rcu_dereference(rt->from);
6139	if (from) {
6140		if (fibmatch)
6141			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
6142					    iif, RTM_NEWROUTE,
6143					    NETLINK_CB(in_skb).portid,
6144					    nlh->nlmsg_seq, 0);
6145		else
6146			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
6147					    &fl6.saddr, iif, RTM_NEWROUTE,
6148					    NETLINK_CB(in_skb).portid,
6149					    nlh->nlmsg_seq, 0);
6150	} else {
6151		err = -ENETUNREACH;
6152	}
6153	rcu_read_unlock();
6154
6155	if (err < 0) {
6156		kfree_skb(skb);
6157		goto errout;
6158	}
6159
6160	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
6161errout:
6162	return err;
6163}
6164
6165void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6166		     unsigned int nlm_flags)
6167{
6168	struct sk_buff *skb;
6169	struct net *net = info->nl_net;
6170	u32 seq;
6171	int err;
6172
6173	err = -ENOBUFS;
6174	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6175
6176	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6177	if (!skb)
6178		goto errout;
6179
6180	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6181			    event, info->portid, seq, nlm_flags);
6182	if (err < 0) {
6183		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6184		WARN_ON(err == -EMSGSIZE);
6185		kfree_skb(skb);
6186		goto errout;
6187	}
6188	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6189		    info->nlh, gfp_any());
6190	return;
6191errout:
6192	if (err < 0)
6193		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6194}
6195
6196void fib6_rt_update(struct net *net, struct fib6_info *rt,
6197		    struct nl_info *info)
6198{
6199	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6200	struct sk_buff *skb;
6201	int err = -ENOBUFS;
6202
6203	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6204	if (!skb)
6205		goto errout;
6206
6207	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6208			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6209	if (err < 0) {
6210		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6211		WARN_ON(err == -EMSGSIZE);
6212		kfree_skb(skb);
6213		goto errout;
6214	}
6215	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6216		    info->nlh, gfp_any());
6217	return;
6218errout:
6219	if (err < 0)
6220		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6221}
6222
6223void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
6224			    bool offload, bool trap, bool offload_failed)
6225{
6226	struct sk_buff *skb;
6227	int err;
6228
6229	if (READ_ONCE(f6i->offload) == offload &&
6230	    READ_ONCE(f6i->trap) == trap &&
6231	    READ_ONCE(f6i->offload_failed) == offload_failed)
6232		return;
6233
6234	WRITE_ONCE(f6i->offload, offload);
6235	WRITE_ONCE(f6i->trap, trap);
6236
6237	/* 2 means send notifications only if offload_failed was changed. */
6238	if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
6239	    READ_ONCE(f6i->offload_failed) == offload_failed)
6240		return;
6241
6242	WRITE_ONCE(f6i->offload_failed, offload_failed);
6243
6244	if (!rcu_access_pointer(f6i->fib6_node))
6245		/* The route was removed from the tree, do not send
6246		 * notification.
6247		 */
6248		return;
6249
6250	if (!net->ipv6.sysctl.fib_notify_on_flag_change)
6251		return;
6252
6253	skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
6254	if (!skb) {
6255		err = -ENOBUFS;
6256		goto errout;
6257	}
6258
6259	err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
6260			    0, 0);
6261	if (err < 0) {
6262		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6263		WARN_ON(err == -EMSGSIZE);
6264		kfree_skb(skb);
6265		goto errout;
6266	}
6267
6268	rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
6269	return;
6270
6271errout:
6272	rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6273}
6274EXPORT_SYMBOL(fib6_info_hw_flags_set);
6275
6276static int ip6_route_dev_notify(struct notifier_block *this,
6277				unsigned long event, void *ptr)
6278{
6279	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6280	struct net *net = dev_net(dev);
6281
6282	if (!(dev->flags & IFF_LOOPBACK))
6283		return NOTIFY_OK;
6284
6285	if (event == NETDEV_REGISTER) {
6286		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6287		net->ipv6.ip6_null_entry->dst.dev = dev;
6288		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6289#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6290		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6291		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6292		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6293		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6294#endif
6295	 } else if (event == NETDEV_UNREGISTER &&
6296		    dev->reg_state != NETREG_UNREGISTERED) {
6297		/* NETDEV_UNREGISTER could be fired for multiple times by
6298		 * netdev_wait_allrefs(). Make sure we only call this once.
6299		 */
6300		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6301#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6302		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6303		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6304#endif
6305	}
6306
6307	return NOTIFY_OK;
6308}
6309
6310/*
6311 *	/proc
6312 */
6313
6314#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
6315static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6316{
6317	struct net *net = (struct net *)seq->private;
6318	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6319		   net->ipv6.rt6_stats->fib_nodes,
6320		   net->ipv6.rt6_stats->fib_route_nodes,
6321		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6322		   net->ipv6.rt6_stats->fib_rt_entries,
6323		   net->ipv6.rt6_stats->fib_rt_cache,
6324		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6325		   net->ipv6.rt6_stats->fib_discarded_routes);
6326
6327	return 0;
6328}
 
 
 
 
 
 
 
 
 
 
 
 
 
6329#endif	/* CONFIG_PROC_FS */
6330
6331#ifdef CONFIG_SYSCTL
6332
6333static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6334			      void *buffer, size_t *lenp, loff_t *ppos)
 
6335{
6336	struct net *net;
6337	int delay;
6338	int ret;
6339	if (!write)
6340		return -EINVAL;
6341
6342	net = (struct net *)ctl->extra1;
6343	delay = net->ipv6.sysctl.flush_delay;
6344	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6345	if (ret)
6346		return ret;
6347
6348	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6349	return 0;
6350}
6351
6352static struct ctl_table ipv6_route_table_template[] = {
6353	{
6354		.procname	=	"max_size",
6355		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6356		.maxlen		=	sizeof(int),
6357		.mode		=	0644,
6358		.proc_handler	=	proc_dointvec,
6359	},
6360	{
6361		.procname	=	"gc_thresh",
6362		.data		=	&ip6_dst_ops_template.gc_thresh,
6363		.maxlen		=	sizeof(int),
6364		.mode		=	0644,
6365		.proc_handler	=	proc_dointvec,
6366	},
6367	{
6368		.procname	=	"flush",
6369		.data		=	&init_net.ipv6.sysctl.flush_delay,
6370		.maxlen		=	sizeof(int),
6371		.mode		=	0200,
6372		.proc_handler	=	ipv6_sysctl_rtcache_flush
6373	},
6374	{
6375		.procname	=	"gc_min_interval",
6376		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6377		.maxlen		=	sizeof(int),
6378		.mode		=	0644,
6379		.proc_handler	=	proc_dointvec_jiffies,
6380	},
6381	{
6382		.procname	=	"gc_timeout",
6383		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6384		.maxlen		=	sizeof(int),
6385		.mode		=	0644,
6386		.proc_handler	=	proc_dointvec_jiffies,
6387	},
6388	{
6389		.procname	=	"gc_interval",
6390		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6391		.maxlen		=	sizeof(int),
6392		.mode		=	0644,
6393		.proc_handler	=	proc_dointvec_jiffies,
6394	},
6395	{
6396		.procname	=	"gc_elasticity",
6397		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6398		.maxlen		=	sizeof(int),
6399		.mode		=	0644,
6400		.proc_handler	=	proc_dointvec,
6401	},
6402	{
6403		.procname	=	"mtu_expires",
6404		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6405		.maxlen		=	sizeof(int),
6406		.mode		=	0644,
6407		.proc_handler	=	proc_dointvec_jiffies,
6408	},
6409	{
6410		.procname	=	"min_adv_mss",
6411		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6412		.maxlen		=	sizeof(int),
6413		.mode		=	0644,
6414		.proc_handler	=	proc_dointvec,
6415	},
6416	{
6417		.procname	=	"gc_min_interval_ms",
6418		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6419		.maxlen		=	sizeof(int),
6420		.mode		=	0644,
6421		.proc_handler	=	proc_dointvec_ms_jiffies,
6422	},
6423	{
6424		.procname	=	"skip_notify_on_dev_down",
6425		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6426		.maxlen		=	sizeof(u8),
6427		.mode		=	0644,
6428		.proc_handler	=	proc_dou8vec_minmax,
6429		.extra1		=	SYSCTL_ZERO,
6430		.extra2		=	SYSCTL_ONE,
6431	},
6432	{ }
6433};
6434
6435struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6436{
6437	struct ctl_table *table;
6438
6439	table = kmemdup(ipv6_route_table_template,
6440			sizeof(ipv6_route_table_template),
6441			GFP_KERNEL);
6442
6443	if (table) {
6444		table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
 
6445		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6446		table[2].data = &net->ipv6.sysctl.flush_delay;
6447		table[2].extra1 = net;
6448		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6449		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6450		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6451		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6452		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6453		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6454		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6455		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6456
6457		/* Don't export sysctls to unprivileged users */
6458		if (net->user_ns != &init_user_ns)
6459			table[1].procname = NULL;
6460	}
6461
6462	return table;
6463}
6464
6465size_t ipv6_route_sysctl_table_size(struct net *net)
6466{
6467	/* Don't export sysctls to unprivileged users */
6468	if (net->user_ns != &init_user_ns)
6469		return 1;
6470
6471	return ARRAY_SIZE(ipv6_route_table_template);
6472}
6473#endif
6474
6475static int __net_init ip6_route_net_init(struct net *net)
6476{
6477	int ret = -ENOMEM;
6478
6479	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6480	       sizeof(net->ipv6.ip6_dst_ops));
6481
6482	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6483		goto out_ip6_dst_ops;
6484
6485	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6486	if (!net->ipv6.fib6_null_entry)
6487		goto out_ip6_dst_entries;
6488	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6489	       sizeof(*net->ipv6.fib6_null_entry));
6490
6491	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6492					   sizeof(*net->ipv6.ip6_null_entry),
6493					   GFP_KERNEL);
6494	if (!net->ipv6.ip6_null_entry)
6495		goto out_fib6_null_entry;
 
 
6496	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6497	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6498			 ip6_template_metrics, true);
6499	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
6500
6501#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6502	net->ipv6.fib6_has_custom_rules = false;
6503	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6504					       sizeof(*net->ipv6.ip6_prohibit_entry),
6505					       GFP_KERNEL);
6506	if (!net->ipv6.ip6_prohibit_entry)
6507		goto out_ip6_null_entry;
 
 
6508	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6509	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6510			 ip6_template_metrics, true);
6511	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
6512
6513	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6514					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6515					       GFP_KERNEL);
6516	if (!net->ipv6.ip6_blk_hole_entry)
6517		goto out_ip6_prohibit_entry;
 
 
6518	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6519	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6520			 ip6_template_metrics, true);
6521	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
6522#ifdef CONFIG_IPV6_SUBTREES
6523	net->ipv6.fib6_routes_require_src = 0;
6524#endif
6525#endif
6526
6527	net->ipv6.sysctl.flush_delay = 0;
6528	net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
6529	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6530	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6531	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6532	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6533	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6534	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6535	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6536
6537	atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
6538
6539	ret = 0;
6540out:
6541	return ret;
6542
6543#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6544out_ip6_prohibit_entry:
6545	kfree(net->ipv6.ip6_prohibit_entry);
6546out_ip6_null_entry:
6547	kfree(net->ipv6.ip6_null_entry);
6548#endif
6549out_fib6_null_entry:
6550	kfree(net->ipv6.fib6_null_entry);
6551out_ip6_dst_entries:
6552	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6553out_ip6_dst_ops:
6554	goto out;
6555}
6556
6557static void __net_exit ip6_route_net_exit(struct net *net)
6558{
6559	kfree(net->ipv6.fib6_null_entry);
6560	kfree(net->ipv6.ip6_null_entry);
6561#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6562	kfree(net->ipv6.ip6_prohibit_entry);
6563	kfree(net->ipv6.ip6_blk_hole_entry);
6564#endif
6565	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6566}
6567
6568static int __net_init ip6_route_net_init_late(struct net *net)
6569{
6570#ifdef CONFIG_PROC_FS
6571	if (!proc_create_net("ipv6_route", 0, net->proc_net,
6572			     &ipv6_route_seq_ops,
6573			     sizeof(struct ipv6_route_iter)))
6574		return -ENOMEM;
6575
6576	if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
6577				    rt6_stats_seq_show, NULL)) {
6578		remove_proc_entry("ipv6_route", net->proc_net);
6579		return -ENOMEM;
6580	}
6581#endif
6582	return 0;
6583}
6584
6585static void __net_exit ip6_route_net_exit_late(struct net *net)
6586{
6587#ifdef CONFIG_PROC_FS
6588	remove_proc_entry("ipv6_route", net->proc_net);
6589	remove_proc_entry("rt6_stats", net->proc_net);
6590#endif
6591}
6592
6593static struct pernet_operations ip6_route_net_ops = {
6594	.init = ip6_route_net_init,
6595	.exit = ip6_route_net_exit,
6596};
6597
6598static int __net_init ipv6_inetpeer_init(struct net *net)
6599{
6600	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6601
6602	if (!bp)
6603		return -ENOMEM;
6604	inet_peer_base_init(bp);
6605	net->ipv6.peers = bp;
6606	return 0;
6607}
6608
6609static void __net_exit ipv6_inetpeer_exit(struct net *net)
6610{
6611	struct inet_peer_base *bp = net->ipv6.peers;
6612
6613	net->ipv6.peers = NULL;
6614	inetpeer_invalidate_tree(bp);
6615	kfree(bp);
6616}
6617
6618static struct pernet_operations ipv6_inetpeer_ops = {
6619	.init	=	ipv6_inetpeer_init,
6620	.exit	=	ipv6_inetpeer_exit,
6621};
6622
6623static struct pernet_operations ip6_route_net_late_ops = {
6624	.init = ip6_route_net_init_late,
6625	.exit = ip6_route_net_exit_late,
6626};
6627
6628static struct notifier_block ip6_route_dev_notifier = {
6629	.notifier_call = ip6_route_dev_notify,
6630	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6631};
6632
6633void __init ip6_route_init_special_entries(void)
6634{
6635	/* Registering of the loopback is done before this portion of code,
6636	 * the loopback reference in rt6_info will not be taken, do it
6637	 * manually for init_net */
6638	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6639	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6640	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6641  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6642	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6643	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6644	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6645	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6646  #endif
6647}
6648
6649#if IS_BUILTIN(CONFIG_IPV6)
6650#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6651DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6652
6653BTF_ID_LIST(btf_fib6_info_id)
6654BTF_ID(struct, fib6_info)
6655
6656static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6657	.seq_ops		= &ipv6_route_seq_ops,
6658	.init_seq_private	= bpf_iter_init_seq_net,
6659	.fini_seq_private	= bpf_iter_fini_seq_net,
6660	.seq_priv_size		= sizeof(struct ipv6_route_iter),
6661};
6662
6663static struct bpf_iter_reg ipv6_route_reg_info = {
6664	.target			= "ipv6_route",
6665	.ctx_arg_info_size	= 1,
6666	.ctx_arg_info		= {
6667		{ offsetof(struct bpf_iter__ipv6_route, rt),
6668		  PTR_TO_BTF_ID_OR_NULL },
6669	},
6670	.seq_info		= &ipv6_route_seq_info,
6671};
6672
6673static int __init bpf_iter_register(void)
6674{
6675	ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6676	return bpf_iter_reg_target(&ipv6_route_reg_info);
6677}
6678
6679static void bpf_iter_unregister(void)
6680{
6681	bpf_iter_unreg_target(&ipv6_route_reg_info);
6682}
6683#endif
6684#endif
6685
6686int __init ip6_route_init(void)
6687{
6688	int ret;
6689	int cpu;
6690
6691	ret = -ENOMEM;
6692	ip6_dst_ops_template.kmem_cachep =
6693		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6694				  SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
6695	if (!ip6_dst_ops_template.kmem_cachep)
6696		goto out;
6697
6698	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6699	if (ret)
6700		goto out_kmem_cache;
6701
6702	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6703	if (ret)
6704		goto out_dst_entries;
6705
6706	ret = register_pernet_subsys(&ip6_route_net_ops);
6707	if (ret)
6708		goto out_register_inetpeer;
6709
6710	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6711
 
 
 
 
 
 
 
 
 
 
 
6712	ret = fib6_init();
6713	if (ret)
6714		goto out_register_subsys;
6715
6716	ret = xfrm6_init();
6717	if (ret)
6718		goto out_fib6_init;
6719
6720	ret = fib6_rules_init();
6721	if (ret)
6722		goto xfrm6_init;
6723
6724	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6725	if (ret)
6726		goto fib6_rules_init;
6727
6728	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6729				   inet6_rtm_newroute, NULL, 0);
6730	if (ret < 0)
6731		goto out_register_late_subsys;
6732
6733	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6734				   inet6_rtm_delroute, NULL, 0);
6735	if (ret < 0)
6736		goto out_register_late_subsys;
6737
6738	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6739				   inet6_rtm_getroute, NULL,
6740				   RTNL_FLAG_DOIT_UNLOCKED);
6741	if (ret < 0)
6742		goto out_register_late_subsys;
6743
6744	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6745	if (ret)
6746		goto out_register_late_subsys;
6747
6748#if IS_BUILTIN(CONFIG_IPV6)
6749#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6750	ret = bpf_iter_register();
6751	if (ret)
6752		goto out_register_late_subsys;
6753#endif
6754#endif
6755
6756	for_each_possible_cpu(cpu) {
6757		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6758
6759		INIT_LIST_HEAD(&ul->head);
6760		INIT_LIST_HEAD(&ul->quarantine);
6761		spin_lock_init(&ul->lock);
6762	}
6763
6764out:
6765	return ret;
6766
6767out_register_late_subsys:
6768	rtnl_unregister_all(PF_INET6);
6769	unregister_pernet_subsys(&ip6_route_net_late_ops);
6770fib6_rules_init:
6771	fib6_rules_cleanup();
6772xfrm6_init:
6773	xfrm6_fini();
6774out_fib6_init:
6775	fib6_gc_cleanup();
6776out_register_subsys:
6777	unregister_pernet_subsys(&ip6_route_net_ops);
6778out_register_inetpeer:
6779	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6780out_dst_entries:
6781	dst_entries_destroy(&ip6_dst_blackhole_ops);
6782out_kmem_cache:
6783	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6784	goto out;
6785}
6786
6787void ip6_route_cleanup(void)
6788{
6789#if IS_BUILTIN(CONFIG_IPV6)
6790#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6791	bpf_iter_unregister();
6792#endif
6793#endif
6794	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6795	unregister_pernet_subsys(&ip6_route_net_late_ops);
6796	fib6_rules_cleanup();
6797	xfrm6_fini();
6798	fib6_gc_cleanup();
6799	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6800	unregister_pernet_subsys(&ip6_route_net_ops);
6801	dst_entries_destroy(&ip6_dst_blackhole_ops);
6802	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6803}