Linux Audio

Check our new training course

Embedded Linux training

Mar 31-Apr 8, 2025
Register
Loading...
v4.6
 
   1/*
   2 *	Linux INET6 implementation
   3 *	FIB front-end.
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*	Changes:
  15 *
  16 *	YOSHIFUJI Hideaki @USAGI
  17 *		reworked default router selection.
  18 *		- respect outgoing interface
  19 *		- select from (probably) reachable routers (i.e.
  20 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *		- always select the same router if it is (probably)
  22 *		reachable.  otherwise, round-robin the list.
  23 *	Ville Nuorvala
  24 *		Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
 
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
 
 
 
 
 
 
 
  73enum rt6_nud_state {
  74	RT6_NUD_FAIL_HARD = -3,
  75	RT6_NUD_FAIL_PROBE = -2,
  76	RT6_NUD_FAIL_DO_RR = -1,
  77	RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void		ip6_dst_destroy(struct dst_entry *);
  86static void		ip6_dst_ifdown(struct dst_entry *,
  87				       struct net_device *dev, int how);
  88static int		 ip6_dst_gc(struct dst_ops *ops);
  89
  90static int		ip6_pkt_discard(struct sk_buff *skb);
  91static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int		ip6_pkt_prohibit(struct sk_buff *skb);
  93static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void		ip6_link_failure(struct sk_buff *skb);
  95static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96					   struct sk_buff *skb, u32 mtu);
 
  97static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98					struct sk_buff *skb);
  99static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 
 
 
 
 
 
 
 
 
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104					   const struct in6_addr *prefix, int prefixlen,
 105					   const struct in6_addr *gwaddr, int ifindex,
 
 106					   unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108					   const struct in6_addr *prefix, int prefixlen,
 109					   const struct in6_addr *gwaddr, int ifindex);
 
 110#endif
 111
 112struct uncached_list {
 113	spinlock_t		lock;
 114	struct list_head	head;
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123	rt->dst.flags |= DST_NOCACHE;
 124	rt->rt6i_uncached_list = ul;
 125
 126	spin_lock_bh(&ul->lock);
 127	list_add_tail(&rt->rt6i_uncached, &ul->head);
 128	spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133	if (!list_empty(&rt->rt6i_uncached)) {
 134		struct uncached_list *ul = rt->rt6i_uncached_list;
 
 135
 136		spin_lock_bh(&ul->lock);
 137		list_del(&rt->rt6i_uncached);
 
 138		spin_unlock_bh(&ul->lock);
 139	}
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144	struct net_device *loopback_dev = net->loopback_dev;
 145	int cpu;
 146
 147	if (dev == loopback_dev)
 148		return;
 149
 150	for_each_possible_cpu(cpu) {
 151		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152		struct rt6_info *rt;
 153
 154		spin_lock_bh(&ul->lock);
 155		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156			struct inet6_dev *rt_idev = rt->rt6i_idev;
 157			struct net_device *rt_dev = rt->dst.dev;
 158
 159			if (rt_idev->dev == dev) {
 160				rt->rt6i_idev = in6_dev_get(loopback_dev);
 161				in6_dev_put(rt_idev);
 162			}
 163
 164			if (rt_dev == dev) {
 165				rt->dst.dev = loopback_dev;
 166				dev_hold(rt->dst.dev);
 167				dev_put(rt_dev);
 168			}
 169		}
 170		spin_unlock_bh(&ul->lock);
 171	}
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176	return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181	struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183	if (rt->rt6i_flags & RTF_PCPU)
 184		return rt6_pcpu_cow_metrics(rt);
 185	else if (rt->rt6i_flags & RTF_CACHE)
 186		return NULL;
 187	else
 188		return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192					     struct sk_buff *skb,
 193					     const void *daddr)
 194{
 195	struct in6_addr *p = &rt->rt6i_gateway;
 196
 197	if (!ipv6_addr_any(p))
 198		return (const void *) p;
 199	else if (skb)
 200		return &ipv6_hdr(skb)->daddr;
 201	return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205					  struct sk_buff *skb,
 206					  const void *daddr)
 
 207{
 208	struct rt6_info *rt = (struct rt6_info *) dst;
 209	struct neighbour *n;
 210
 211	daddr = choose_neigh_daddr(rt, skb, daddr);
 212	n = __ipv6_neigh_lookup(dst->dev, daddr);
 213	if (n)
 214		return n;
 215	return neigh_create(&nd_tbl, daddr, dst->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 216}
 217
 218static struct dst_ops ip6_dst_ops_template = {
 219	.family			=	AF_INET6,
 220	.gc			=	ip6_dst_gc,
 221	.gc_thresh		=	1024,
 222	.check			=	ip6_dst_check,
 223	.default_advmss		=	ip6_default_advmss,
 224	.mtu			=	ip6_mtu,
 225	.cow_metrics		=	ipv6_cow_metrics,
 226	.destroy		=	ip6_dst_destroy,
 227	.ifdown			=	ip6_dst_ifdown,
 228	.negative_advice	=	ip6_negative_advice,
 229	.link_failure		=	ip6_link_failure,
 230	.update_pmtu		=	ip6_rt_update_pmtu,
 231	.redirect		=	rt6_do_redirect,
 232	.local_out		=	__ip6_local_out,
 233	.neigh_lookup		=	ip6_neigh_lookup,
 
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240	return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244					 struct sk_buff *skb, u32 mtu)
 
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249				      struct sk_buff *skb)
 250{
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254	.family			=	AF_INET6,
 255	.destroy		=	ip6_dst_destroy,
 256	.check			=	ip6_dst_check,
 257	.mtu			=	ip6_blackhole_mtu,
 258	.default_advmss		=	ip6_default_advmss,
 259	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 260	.redirect		=	ip6_rt_blackhole_redirect,
 261	.cow_metrics		=	dst_cow_metrics_generic,
 262	.neigh_lookup		=	ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266	[RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 
 
 
 
 
 
 
 
 
 269static const struct rt6_info ip6_null_entry_template = {
 270	.dst = {
 271		.__refcnt	= ATOMIC_INIT(1),
 272		.__use		= 1,
 273		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 274		.error		= -ENETUNREACH,
 275		.input		= ip6_pkt_discard,
 276		.output		= ip6_pkt_discard_out,
 277	},
 278	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 279	.rt6i_protocol  = RTPROT_KERNEL,
 280	.rt6i_metric	= ~(u32) 0,
 281	.rt6i_ref	= ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 287	.dst = {
 288		.__refcnt	= ATOMIC_INIT(1),
 289		.__use		= 1,
 290		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 291		.error		= -EACCES,
 292		.input		= ip6_pkt_prohibit,
 293		.output		= ip6_pkt_prohibit_out,
 294	},
 295	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 296	.rt6i_protocol  = RTPROT_KERNEL,
 297	.rt6i_metric	= ~(u32) 0,
 298	.rt6i_ref	= ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302	.dst = {
 303		.__refcnt	= ATOMIC_INIT(1),
 304		.__use		= 1,
 305		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 306		.error		= -EINVAL,
 307		.input		= dst_discard,
 308		.output		= dst_discard_out,
 309	},
 310	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 311	.rt6i_protocol  = RTPROT_KERNEL,
 312	.rt6i_metric	= ~(u32) 0,
 313	.rt6i_ref	= ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320	struct dst_entry *dst = &rt->dst;
 321
 322	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323	INIT_LIST_HEAD(&rt->rt6i_siblings);
 324	INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329					struct net_device *dev,
 330					int flags)
 331{
 332	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333					0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335	if (rt)
 336		rt6_info_init(rt);
 337
 338	return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342			       struct net_device *dev,
 343			       int flags)
 344{
 345	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 
 346
 347	if (rt) {
 348		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349		if (rt->rt6i_pcpu) {
 350			int cpu;
 351
 352			for_each_possible_cpu(cpu) {
 353				struct rt6_info **p;
 354
 355				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356				/* no one shares rt */
 357				*p =  NULL;
 358			}
 359		} else {
 360			dst_destroy((struct dst_entry *)rt);
 361			return NULL;
 362		}
 363	}
 364
 365	return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371	struct rt6_info *rt = (struct rt6_info *)dst;
 372	struct dst_entry *from = dst->from;
 373	struct inet6_dev *idev;
 374
 375	dst_destroy_metrics_generic(dst);
 376	free_percpu(rt->rt6i_pcpu);
 377	rt6_uncached_list_del(rt);
 378
 379	idev = rt->rt6i_idev;
 380	if (idev) {
 381		rt->rt6i_idev = NULL;
 382		in6_dev_put(idev);
 383	}
 384
 385	dst->from = NULL;
 386	dst_release(from);
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390			   int how)
 391{
 392	struct rt6_info *rt = (struct rt6_info *)dst;
 393	struct inet6_dev *idev = rt->rt6i_idev;
 394	struct net_device *loopback_dev =
 395		dev_net(dev)->loopback_dev;
 396
 397	if (dev != loopback_dev) {
 398		if (idev && idev->dev == dev) {
 399			struct inet6_dev *loopback_idev =
 400				in6_dev_get(loopback_dev);
 401			if (loopback_idev) {
 402				rt->rt6i_idev = loopback_idev;
 403				in6_dev_put(idev);
 404			}
 405		}
 406	}
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411	if (rt->rt6i_flags & RTF_EXPIRES)
 412		return time_after(jiffies, rt->dst.expires);
 413	else
 414		return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 
 
 
 
 419	if (rt->rt6i_flags & RTF_EXPIRES) {
 420		if (time_after(jiffies, rt->dst.expires))
 421			return true;
 422	} else if (rt->dst.from) {
 423		return rt6_check_expired((struct rt6_info *) rt->dst.from);
 
 424	}
 425	return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433			       const struct flowi6 *fl6)
 434{
 435	return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439					     struct flowi6 *fl6, int oif,
 440					     int strict)
 441{
 442	struct rt6_info *sibling, *next_sibling;
 443	int route_choosen;
 444
 445	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446	/* Don't change the route, if route_choosen == 0
 447	 * (siblings does not include ourself)
 
 
 448	 */
 449	if (route_choosen)
 450		list_for_each_entry_safe(sibling, next_sibling,
 451				&match->rt6i_siblings, rt6i_siblings) {
 452			route_choosen--;
 453			if (route_choosen == 0) {
 454				if (rt6_score_route(sibling, oif, strict) < 0)
 455					break;
 456				match = sibling;
 457				break;
 458			}
 459		}
 460	return match;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 461}
 462
 463/*
 464 *	Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468						    struct rt6_info *rt,
 469						    const struct in6_addr *saddr,
 470						    int oif,
 471						    int flags)
 472{
 473	struct rt6_info *local = NULL;
 474	struct rt6_info *sprt;
 475
 476	if (!oif && ipv6_addr_any(saddr))
 477		goto out;
 478
 479	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480		struct net_device *dev = sprt->dst.dev;
 481
 482		if (oif) {
 483			if (dev->ifindex == oif)
 484				return sprt;
 485			if (dev->flags & IFF_LOOPBACK) {
 486				if (!sprt->rt6i_idev ||
 487				    sprt->rt6i_idev->dev->ifindex != oif) {
 488					if (flags & RT6_LOOKUP_F_IFACE)
 489						continue;
 490					if (local &&
 491					    local->rt6i_idev->dev->ifindex == oif)
 492						continue;
 493				}
 494				local = sprt;
 495			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 496		} else {
 497			if (ipv6_chk_addr(net, saddr, dev,
 498					  flags & RT6_LOOKUP_F_IFACE))
 499				return sprt;
 500		}
 
 
 501	}
 502
 503	if (oif) {
 504		if (local)
 505			return local;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 506
 507		if (flags & RT6_LOOKUP_F_IFACE)
 508			return net->ipv6.ip6_null_entry;
 
 509	}
 510out:
 511	return rt;
 
 
 
 
 
 
 
 
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516	struct work_struct work;
 517	struct in6_addr target;
 518	struct net_device *dev;
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523	struct in6_addr mcaddr;
 524	struct __rt6_probe_work *work =
 525		container_of(w, struct __rt6_probe_work, work);
 526
 527	addrconf_addr_solict_mult(&work->target, &mcaddr);
 528	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529	dev_put(work->dev);
 530	kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535	struct __rt6_probe_work *work;
 
 
 536	struct neighbour *neigh;
 
 
 
 537	/*
 538	 * Okay, this does not seem to be appropriate
 539	 * for now, however, we need to check if it
 540	 * is really so; aka Router Reachability Probing.
 541	 *
 542	 * Router Reachability Probe MUST be rate-limited
 543	 * to no more than one per minute.
 544	 */
 545	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546		return;
 
 
 
 547	rcu_read_lock_bh();
 548	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 
 
 549	if (neigh) {
 550		if (neigh->nud_state & NUD_VALID)
 551			goto out;
 552
 553		work = NULL;
 554		write_lock(&neigh->lock);
 555		if (!(neigh->nud_state & NUD_VALID) &&
 556		    time_after(jiffies,
 557			       neigh->updated +
 558			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560			if (work)
 561				__neigh_set_probe_once(neigh);
 562		}
 563		write_unlock(&neigh->lock);
 564	} else {
 
 565		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566	}
 567
 568	if (work) {
 
 
 
 569		INIT_WORK(&work->work, rt6_probe_deferred);
 570		work->target = rt->rt6i_gateway;
 571		dev_hold(rt->dst.dev);
 572		work->dev = rt->dst.dev;
 573		schedule_work(&work->work);
 574	}
 575
 576out:
 577	rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590	struct net_device *dev = rt->dst.dev;
 591	if (!oif || dev->ifindex == oif)
 592		return 2;
 593	if ((dev->flags & IFF_LOOPBACK) &&
 594	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595		return 1;
 596	return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601	struct neighbour *neigh;
 602	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 603
 604	if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605	    !(rt->rt6i_flags & RTF_GATEWAY))
 606		return RT6_NUD_SUCCEED;
 607
 608	rcu_read_lock_bh();
 609	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 
 610	if (neigh) {
 611		read_lock(&neigh->lock);
 612		if (neigh->nud_state & NUD_VALID)
 613			ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615		else if (!(neigh->nud_state & NUD_FAILED))
 616			ret = RT6_NUD_SUCCEED;
 617		else
 618			ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620		read_unlock(&neigh->lock);
 621	} else {
 622		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624	}
 625	rcu_read_unlock_bh();
 626
 627	return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631			   int strict)
 632{
 633	int m;
 
 
 
 634
 635	m = rt6_check_dev(rt, oif);
 636	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637		return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641	if (strict & RT6_LOOKUP_F_REACHABLE) {
 642		int n = rt6_check_neigh(rt);
 
 643		if (n < 0)
 644			return n;
 645	}
 646	return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650				   int *mpri, struct rt6_info *match,
 651				   bool *do_rr)
 652{
 653	int m;
 654	bool match_do_rr = false;
 655	struct inet6_dev *idev = rt->rt6i_idev;
 656	struct net_device *dev = rt->dst.dev;
 657
 658	if (dev && !netif_carrier_ok(dev) &&
 659	    idev->cnf.ignore_routes_with_linkdown)
 660		goto out;
 661
 662	if (rt6_check_expired(rt))
 
 
 663		goto out;
 664
 665	m = rt6_score_route(rt, oif, strict);
 666	if (m == RT6_NUD_FAIL_DO_RR) {
 667		match_do_rr = true;
 668		m = 0; /* lowest valid score */
 669	} else if (m == RT6_NUD_FAIL_HARD) {
 670		goto out;
 671	}
 672
 673	if (strict & RT6_LOOKUP_F_REACHABLE)
 674		rt6_probe(rt);
 675
 676	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677	if (m > *mpri) {
 678		*do_rr = match_do_rr;
 679		*mpri = m;
 680		match = rt;
 681	}
 682out:
 683	return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687				     struct rt6_info *rr_head,
 688				     u32 metric, int oif, int strict,
 689				     bool *do_rr)
 
 
 
 
 
 
 690{
 691	struct rt6_info *rt, *match, *cont;
 692	int mpri = -1;
 693
 694	match = NULL;
 695	cont = NULL;
 696	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697		if (rt->rt6i_metric != metric) {
 698			cont = rt;
 699			break;
 700		}
 701
 702		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703	}
 
 
 
 
 704
 705	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706		if (rt->rt6i_metric != metric) {
 707			cont = rt;
 708			break;
 
 
 
 
 
 709		}
 710
 711		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 712	}
 
 713
 714	if (match || !cont)
 715		return match;
 
 
 
 
 
 716
 717	for (rt = cont; rt; rt = rt->dst.rt6_next)
 718		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720	return match;
 
 
 
 
 
 
 
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 724{
 725	struct rt6_info *match, *rt0;
 726	struct net *net;
 727	bool do_rr = false;
 
 
 
 
 728
 729	rt0 = fn->rr_ptr;
 
 
 
 730	if (!rt0)
 731		fn->rr_ptr = rt0 = fn->leaf;
 732
 733	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734			     &do_rr);
 
 
 
 
 
 
 
 
 
 
 735
 
 736	if (do_rr) {
 737		struct rt6_info *next = rt0->dst.rt6_next;
 738
 739		/* no entries matched; do round-robin */
 740		if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741			next = fn->leaf;
 742
 743		if (next != rt0)
 744			fn->rr_ptr = next;
 
 
 
 
 
 745	}
 746
 747	net = dev_net(rt0->dst.dev);
 748	return match ? match : net->ipv6.ip6_null_entry;
 
 
 
 
 
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758		  const struct in6_addr *gwaddr)
 759{
 760	struct net *net = dev_net(dev);
 761	struct route_info *rinfo = (struct route_info *) opt;
 762	struct in6_addr prefix_buf, *prefix;
 763	unsigned int pref;
 764	unsigned long lifetime;
 765	struct rt6_info *rt;
 766
 767	if (len < sizeof(struct route_info)) {
 768		return -EINVAL;
 769	}
 770
 771	/* Sanity check for prefix_len and length */
 772	if (rinfo->length > 3) {
 773		return -EINVAL;
 774	} else if (rinfo->prefix_len > 128) {
 775		return -EINVAL;
 776	} else if (rinfo->prefix_len > 64) {
 777		if (rinfo->length < 2) {
 778			return -EINVAL;
 779		}
 780	} else if (rinfo->prefix_len > 0) {
 781		if (rinfo->length < 1) {
 782			return -EINVAL;
 783		}
 784	}
 785
 786	pref = rinfo->route_pref;
 787	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788		return -EINVAL;
 789
 790	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792	if (rinfo->length == 3)
 793		prefix = (struct in6_addr *)rinfo->prefix;
 794	else {
 795		/* this function is safe */
 796		ipv6_addr_prefix(&prefix_buf,
 797				 (struct in6_addr *)rinfo->prefix,
 798				 rinfo->prefix_len);
 799		prefix = &prefix_buf;
 800	}
 801
 802	if (rinfo->prefix_len == 0)
 803		rt = rt6_get_dflt_router(gwaddr, dev);
 804	else
 805		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806					gwaddr, dev->ifindex);
 807
 808	if (rt && !lifetime) {
 809		ip6_del_rt(rt);
 810		rt = NULL;
 811	}
 812
 813	if (!rt && lifetime)
 814		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815					pref);
 816	else if (rt)
 817		rt->rt6i_flags = RTF_ROUTEINFO |
 818				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820	if (rt) {
 821		if (!addrconf_finite_timeout(lifetime))
 822			rt6_clean_expires(rt);
 823		else
 824			rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826		ip6_rt_put(rt);
 827	}
 828	return 0;
 829}
 830#endif
 831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833					struct in6_addr *saddr)
 834{
 835	struct fib6_node *pn;
 836	while (1) {
 837		if (fn->fn_flags & RTN_TL_ROOT)
 838			return NULL;
 839		pn = fn->parent;
 840		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 
 842		else
 843			fn = pn;
 844		if (fn->fn_flags & RTN_RTINFO)
 845			return fn;
 846	}
 847}
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 850					     struct fib6_table *table,
 851					     struct flowi6 *fl6, int flags)
 
 
 852{
 
 853	struct fib6_node *fn;
 854	struct rt6_info *rt;
 855
 856	read_lock_bh(&table->tb6_lock);
 857	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 858restart:
 859	rt = fn->leaf;
 860	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863	if (rt == net->ipv6.ip6_null_entry) {
 
 
 
 864		fn = fib6_backtrack(fn, &fl6->saddr);
 865		if (fn)
 866			goto restart;
 
 
 
 
 
 
 867	}
 868	dst_use(&rt->dst, jiffies);
 869	read_unlock_bh(&table->tb6_lock);
 870
 871	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 
 872
 873	return rt;
 
 
 
 
 
 
 
 
 
 
 
 
 
 874
 
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878				    int flags)
 879{
 880	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885			    const struct in6_addr *saddr, int oif, int strict)
 
 886{
 887	struct flowi6 fl6 = {
 888		.flowi6_oif = oif,
 889		.daddr = *daddr,
 890	};
 891	struct dst_entry *dst;
 892	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894	if (saddr) {
 895		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896		flags |= RT6_LOOKUP_F_HAS_SADDR;
 897	}
 898
 899	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900	if (dst->error == 0)
 901		return (struct rt6_info *) dst;
 902
 903	dst_release(dst);
 904
 905	return NULL;
 906}
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916			struct mx6_config *mxc)
 917{
 918	int err;
 919	struct fib6_table *table;
 920
 921	table = rt->rt6i_table;
 922	write_lock_bh(&table->tb6_lock);
 923	err = fib6_add(&table->tb6_root, rt, info, mxc);
 924	write_unlock_bh(&table->tb6_lock);
 925
 926	return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
 932	struct mx6_config mxc = { .mx = NULL, };
 933
 934	return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938					   const struct in6_addr *daddr,
 939					   const struct in6_addr *saddr)
 940{
 
 
 941	struct rt6_info *rt;
 942
 943	/*
 944	 *	Clone the route.
 945	 */
 946
 947	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948		ort = (struct rt6_info *)ort->dst.from;
 949
 950	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 951
 952	if (!rt)
 
 
 
 953		return NULL;
 
 954
 955	ip6_rt_copy_init(rt, ort);
 956	rt->rt6i_flags |= RTF_CACHE;
 957	rt->rt6i_metric = 0;
 958	rt->dst.flags |= DST_HOST;
 959	rt->rt6i_dst.addr = *daddr;
 960	rt->rt6i_dst.plen = 128;
 961
 962	if (!rt6_is_gw_or_nonexthop(ort)) {
 963		if (ort->rt6i_dst.plen != 128 &&
 964		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965			rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967		if (rt->rt6i_src.plen && saddr) {
 968			rt->rt6i_src.addr = *saddr;
 969			rt->rt6i_src.plen = 128;
 970		}
 971#endif
 972	}
 973
 974	return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 
 
 
 979	struct rt6_info *pcpu_rt;
 980
 981	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982				  rt->dst.dev, rt->dst.flags);
 983
 984	if (!pcpu_rt)
 
 
 
 
 
 985		return NULL;
 986	ip6_rt_copy_init(pcpu_rt, rt);
 987	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988	pcpu_rt->rt6i_flags |= RTF_PCPU;
 
 
 
 
 989	return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 
 
 
 
 
 994{
 995	struct rt6_info *pcpu_rt, **p;
 996
 997	p = this_cpu_ptr(rt->rt6i_pcpu);
 998	pcpu_rt = *p;
 999
1000	if (pcpu_rt) {
1001		dst_hold(&pcpu_rt->dst);
1002		rt6_dst_from_metrics_check(pcpu_rt);
 
 
 
 
 
 
 
 
1003	}
 
1004	return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
 
1008{
1009	struct fib6_table *table = rt->rt6i_table;
1010	struct rt6_info *pcpu_rt, *prev, **p;
1011
1012	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013	if (!pcpu_rt) {
1014		struct net *net = dev_net(rt->dst.dev);
1015
1016		dst_hold(&net->ipv6.ip6_null_entry->dst);
1017		return net->ipv6.ip6_null_entry;
 
 
 
 
 
 
 
1018	}
1019
1020	read_lock_bh(&table->tb6_lock);
1021	if (rt->rt6i_pcpu) {
1022		p = this_cpu_ptr(rt->rt6i_pcpu);
1023		prev = cmpxchg(p, NULL, pcpu_rt);
1024		if (prev) {
1025			/* If someone did it before us, return prev instead */
1026			dst_destroy(&pcpu_rt->dst);
1027			pcpu_rt = prev;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029	} else {
1030		/* rt has been removed from the fib6 tree
1031		 * before we have a chance to acquire the read_lock.
1032		 * In this case, don't brother to create a pcpu rt
1033		 * since rt is going away anyway.  The next
1034		 * dst_check() will trigger a re-lookup.
1035		 */
1036		dst_destroy(&pcpu_rt->dst);
1037		pcpu_rt = rt;
1038	}
1039	dst_hold(&pcpu_rt->dst);
1040	rt6_dst_from_metrics_check(pcpu_rt);
1041	read_unlock_bh(&table->tb6_lock);
1042	return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046				      struct flowi6 *fl6, int flags)
 
 
 
 
1047{
1048	struct fib6_node *fn, *saved_fn;
1049	struct rt6_info *rt;
1050	int strict = 0;
1051
1052	strict |= flags & RT6_LOOKUP_F_IFACE;
1053	if (net->ipv6.devconf_all->forwarding == 0)
1054		strict |= RT6_LOOKUP_F_REACHABLE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
1056	read_lock_bh(&table->tb6_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1057
1058	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1059	saved_fn = fn;
1060
1061	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062		oif = 0;
1063
1064redo_rt6_select:
1065	rt = rt6_select(fn, oif, strict);
1066	if (rt->rt6i_nsiblings)
1067		rt = rt6_multipath_select(rt, fl6, oif, strict);
1068	if (rt == net->ipv6.ip6_null_entry) {
1069		fn = fib6_backtrack(fn, &fl6->saddr);
1070		if (fn)
1071			goto redo_rt6_select;
1072		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073			/* also consider unreachable route */
1074			strict &= ~RT6_LOOKUP_F_REACHABLE;
1075			fn = saved_fn;
1076			goto redo_rt6_select;
1077		}
1078	}
1079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1080
1081	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082		dst_use(&rt->dst, jiffies);
1083		read_unlock_bh(&table->tb6_lock);
1084
1085		rt6_dst_from_metrics_check(rt);
1086
1087		trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088		return rt;
 
 
1089	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090			    !(rt->rt6i_flags & RTF_GATEWAY))) {
1091		/* Create a RTF_CACHE clone which will not be
1092		 * owned by the fib6 tree.  It is for the special case where
1093		 * the daddr in the skb during the neighbor look-up is different
1094		 * from the fl6->daddr used to look-up route here.
1095		 */
 
 
 
 
 
 
 
 
 
 
 
1096
1097		struct rt6_info *uncached_rt;
 
 
 
 
 
1098
1099		dst_use(&rt->dst, jiffies);
1100		read_unlock_bh(&table->tb6_lock);
1101
1102		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103		dst_release(&rt->dst);
 
 
 
 
 
 
1104
1105		if (uncached_rt)
1106			rt6_uncached_list_add(uncached_rt);
1107		else
1108			uncached_rt = net->ipv6.ip6_null_entry;
1109
1110		dst_hold(&uncached_rt->dst);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1111
1112		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113		return uncached_rt;
 
1114
1115	} else {
1116		/* Get a percpu copy */
 
 
 
 
 
 
 
 
 
1117
1118		struct rt6_info *pcpu_rt;
 
1119
1120		rt->dst.lastuse = jiffies;
1121		rt->dst.__use++;
1122		pcpu_rt = rt6_get_pcpu_route(rt);
 
1123
1124		if (pcpu_rt) {
1125			read_unlock_bh(&table->tb6_lock);
1126		} else {
1127			/* We have to do the read_unlock first
1128			 * because rt6_make_pcpu_route() may trigger
1129			 * ip6_dst_gc() which will take the write_lock.
1130			 */
1131			dst_hold(&rt->dst);
1132			read_unlock_bh(&table->tb6_lock);
1133			pcpu_rt = rt6_make_pcpu_route(rt);
1134			dst_release(&rt->dst);
1135		}
1136
1137		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138		return pcpu_rt;
 
 
 
1139
 
 
 
 
 
 
 
 
 
 
 
 
 
1140	}
1141}
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144					    struct flowi6 *fl6, int flags)
1145{
1146	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1147}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150						struct net_device *dev,
1151						struct flowi6 *fl6, int flags)
1152{
1153	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154		flags |= RT6_LOOKUP_F_IFACE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1155
1156	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
 
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161	const struct ipv6hdr *iph = ipv6_hdr(skb);
1162	struct net *net = dev_net(skb->dev);
1163	int flags = RT6_LOOKUP_F_HAS_SADDR;
1164	struct ip_tunnel_info *tun_info;
1165	struct flowi6 fl6 = {
1166		.flowi6_iif = l3mdev_fib_oif(skb->dev),
1167		.daddr = iph->daddr,
1168		.saddr = iph->saddr,
1169		.flowlabel = ip6_flowinfo(iph),
1170		.flowi6_mark = skb->mark,
1171		.flowi6_proto = iph->nexthdr,
1172	};
 
1173
1174	tun_info = skb_tunnel_info(skb);
1175	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
 
 
 
 
 
 
1177	skb_dst_drop(skb);
1178	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182					     struct flowi6 *fl6, int flags)
 
 
 
1183{
1184	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188					 struct flowi6 *fl6, int flags)
 
1189{
1190	struct dst_entry *dst;
1191	bool any_src;
1192
1193	dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194	if (dst)
1195		return dst;
 
 
 
 
 
 
1196
1197	fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
 
1199	any_src = ipv6_addr_any(&fl6->saddr);
1200	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201	    (fl6->flowi6_oif && any_src))
1202		flags |= RT6_LOOKUP_F_IFACE;
1203
1204	if (!any_src)
1205		flags |= RT6_LOOKUP_F_HAS_SADDR;
1206	else if (sk)
1207		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 
1216	struct dst_entry *new = NULL;
1217
1218	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 
1219	if (rt) {
1220		rt6_info_init(rt);
 
1221
1222		new = &rt->dst;
1223		new->__use = 1;
1224		new->input = dst_discard;
1225		new->output = dst_discard_out;
1226
1227		dst_copy_metrics(new, &ort->dst);
1228		rt->rt6i_idev = ort->rt6i_idev;
1229		if (rt->rt6i_idev)
1230			in6_dev_hold(rt->rt6i_idev);
1231
 
1232		rt->rt6i_gateway = ort->rt6i_gateway;
1233		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234		rt->rt6i_metric = 0;
1235
1236		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241		dst_free(new);
1242	}
1243
1244	dst_release(dst_orig);
1245	return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *	Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254	if (rt->dst.from &&
1255	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
 
 
 
 
 
 
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 
 
1260{
1261	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
 
 
 
1262		return NULL;
1263
1264	if (rt6_check_expired(rt))
1265		return NULL;
1266
1267	return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 
 
1271{
1272	if (!__rt6_check_expired(rt) &&
1273	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275		return &rt->dst;
1276	else
1277		return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
 
 
1282	struct rt6_info *rt;
1283
1284	rt = (struct rt6_info *) dst;
 
 
 
 
 
1285
1286	/* All IPV6 dsts are created with ->obsolete set to the value
1287	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288	 * into this function always.
1289	 */
1290
1291	rt6_dst_from_metrics_check(rt);
1292
1293	if (rt->rt6i_flags & RTF_PCPU ||
1294	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295		return rt6_dst_from_check(rt, cookie);
1296	else
1297		return rt6_check(rt, cookie);
 
 
 
 
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302	struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304	if (rt) {
1305		if (rt->rt6i_flags & RTF_CACHE) {
 
1306			if (rt6_check_expired(rt)) {
1307				ip6_del_rt(rt);
1308				dst = NULL;
1309			}
 
1310		} else {
1311			dst_release(dst);
1312			dst = NULL;
1313		}
1314	}
1315	return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320	struct rt6_info *rt;
1321
1322	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324	rt = (struct rt6_info *) skb_dst(skb);
1325	if (rt) {
 
1326		if (rt->rt6i_flags & RTF_CACHE) {
1327			dst_hold(&rt->dst);
1328			ip6_del_rt(rt);
1329		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330			rt->rt6i_node->fn_sernum = -1;
 
 
 
 
 
 
 
1331		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1332	}
 
 
 
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337	struct net *net = dev_net(rt->dst.dev);
1338
 
1339	rt->rt6i_flags |= RTF_MODIFIED;
1340	rt->rt6i_pmtu = mtu;
1341	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346	return !(rt->rt6i_flags & RTF_CACHE) &&
1347		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351				 const struct ipv6hdr *iph, u32 mtu)
 
1352{
 
1353	struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355	if (rt6->rt6i_flags & RTF_LOCAL)
1356		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
1358	dst_confirm(dst);
1359	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360	if (mtu >= dst_mtu(dst))
1361		return;
1362
1363	if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364		rt6_do_update_pmtu(rt6, mtu);
1365	} else {
1366		const struct in6_addr *daddr, *saddr;
 
 
 
1367		struct rt6_info *nrt6;
1368
1369		if (iph) {
1370			daddr = &iph->daddr;
1371			saddr = &iph->saddr;
1372		} else if (sk) {
1373			daddr = &sk->sk_v6_daddr;
1374			saddr = &inet6_sk(sk)->saddr;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1375		} else {
1376			return;
1377		}
1378		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
 
1379		if (nrt6) {
1380			rt6_do_update_pmtu(nrt6, mtu);
1381
1382			/* ip6_ins_rt(nrt6) will bump the
1383			 * rt6->rt6i_node->fn_sernum
1384			 * which will fail the next rt6_check() and
1385			 * invalidate the sk->sk_dst_cache.
1386			 */
1387			ip6_ins_rt(nrt6);
1388		}
 
 
1389	}
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393			       struct sk_buff *skb, u32 mtu)
 
1394{
1395	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
 
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399		     int oif, u32 mark)
1400{
1401	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402	struct dst_entry *dst;
1403	struct flowi6 fl6;
1404
1405	memset(&fl6, 0, sizeof(fl6));
1406	fl6.flowi6_oif = oif;
1407	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408	fl6.daddr = iph->daddr;
1409	fl6.saddr = iph->saddr;
1410	fl6.flowlabel = ip6_flowinfo(iph);
1411
1412	dst = ip6_route_output(net, NULL, &fl6);
1413	if (!dst->error)
1414		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415	dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
 
1421	struct dst_entry *dst;
1422
1423	ip6_update_pmtu(skb, sock_net(sk), mtu,
1424			sk->sk_bound_dev_if, sk->sk_mark);
 
 
1425
1426	dst = __sk_dst_get(sk);
1427	if (!dst || !dst->obsolete ||
1428	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429		return;
1430
1431	bh_lock_sock(sk);
1432	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433		ip6_datagram_dst_update(sk, false);
1434	bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440	struct flowi6 fl6;
1441	struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445					     struct fib6_table *table,
1446					     struct flowi6 *fl6,
 
1447					     int flags)
1448{
1449	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450	struct rt6_info *rt;
 
 
 
 
 
 
 
 
1451	struct fib6_node *fn;
1452
 
 
 
 
 
 
1453	/* Get the "current" route for this destination and
1454	 * check if the redirect has come from approriate router.
1455	 *
1456	 * RFC 4861 specifies that redirects should only be
1457	 * accepted if they come from the nexthop to the target.
1458	 * Due to the way the routes are chosen, this notion
1459	 * is a bit fuzzy and one might need to check all possible
1460	 * routes.
1461	 */
1462
1463	read_lock_bh(&table->tb6_lock);
1464	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467		if (rt6_check_expired(rt))
 
1468			continue;
1469		if (rt->dst.error)
1470			break;
1471		if (!(rt->rt6i_flags & RTF_GATEWAY))
1472			continue;
1473		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474			continue;
1475		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476			continue;
1477		break;
 
 
 
 
 
 
 
1478	}
1479
1480	if (!rt)
1481		rt = net->ipv6.ip6_null_entry;
1482	else if (rt->dst.error) {
1483		rt = net->ipv6.ip6_null_entry;
1484		goto out;
1485	}
1486
1487	if (rt == net->ipv6.ip6_null_entry) {
1488		fn = fib6_backtrack(fn, &fl6->saddr);
1489		if (fn)
1490			goto restart;
1491	}
1492
 
 
1493out:
1494	dst_hold(&rt->dst);
 
 
 
 
 
 
1495
1496	read_unlock_bh(&table->tb6_lock);
1497
1498	trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499	return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503					const struct flowi6 *fl6,
1504					const struct in6_addr *gateway)
 
1505{
1506	int flags = RT6_LOOKUP_F_HAS_SADDR;
1507	struct ip6rd_flowi rdfl;
1508
1509	rdfl.fl6 = *fl6;
1510	rdfl.gateway = *gateway;
1511
1512	return fib6_rule_lookup(net, &rdfl.fl6,
1513				flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 
1517{
1518	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519	struct dst_entry *dst;
1520	struct flowi6 fl6;
1521
1522	memset(&fl6, 0, sizeof(fl6));
1523	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524	fl6.flowi6_oif = oif;
1525	fl6.flowi6_mark = mark;
1526	fl6.daddr = iph->daddr;
1527	fl6.saddr = iph->saddr;
1528	fl6.flowlabel = ip6_flowinfo(iph);
1529
1530	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531	rt6_do_redirect(dst, NULL, skb);
1532	dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537			    u32 mark)
1538{
1539	const struct ipv6hdr *iph = ipv6_hdr(skb);
1540	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541	struct dst_entry *dst;
1542	struct flowi6 fl6;
1543
1544	memset(&fl6, 0, sizeof(fl6));
1545	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546	fl6.flowi6_oif = oif;
1547	fl6.flowi6_mark = mark;
1548	fl6.daddr = msg->dest;
1549	fl6.saddr = iph->daddr;
1550
1551	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552	rt6_do_redirect(dst, NULL, skb);
1553	dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
 
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564	struct net_device *dev = dst->dev;
1565	unsigned int mtu = dst_mtu(dst);
1566	struct net *net = dev_net(dev);
1567
1568	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573	/*
1574	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1577	 * rely only on pmtu discovery"
1578	 */
1579	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580		mtu = IPV6_MAXPLEN;
1581	return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586	const struct rt6_info *rt = (const struct rt6_info *)dst;
1587	unsigned int mtu = rt->rt6i_pmtu;
1588	struct inet6_dev *idev;
1589
1590	if (mtu)
1591		goto out;
1592
1593	mtu = dst_metric_raw(dst, RTAX_MTU);
1594	if (mtu)
1595		goto out;
1596
1597	mtu = IPV6_MIN_MTU;
1598
1599	rcu_read_lock();
1600	idev = __in6_dev_get(dst->dev);
1601	if (idev)
1602		mtu = idev->cnf.mtu6;
1603	rcu_read_unlock();
1604
1605out:
1606	return min_t(unsigned int, mtu, IP6_MAX_MTU);
 
 
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1613				  struct flowi6 *fl6)
1614{
1615	struct dst_entry *dst;
1616	struct rt6_info *rt;
1617	struct inet6_dev *idev = in6_dev_get(dev);
1618	struct net *net = dev_net(dev);
1619
1620	if (unlikely(!idev))
1621		return ERR_PTR(-ENODEV);
1622
1623	rt = ip6_dst_alloc(net, dev, 0);
1624	if (unlikely(!rt)) {
1625		in6_dev_put(idev);
1626		dst = ERR_PTR(-ENOMEM);
1627		goto out;
1628	}
1629
1630	rt->dst.flags |= DST_HOST;
1631	rt->dst.output  = ip6_output;
1632	atomic_set(&rt->dst.__refcnt, 1);
1633	rt->rt6i_gateway  = fl6->daddr;
1634	rt->rt6i_dst.addr = fl6->daddr;
1635	rt->rt6i_dst.plen = 128;
1636	rt->rt6i_idev     = idev;
1637	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639	spin_lock_bh(&icmp6_dst_lock);
1640	rt->dst.next = icmp6_dst_gc_list;
1641	icmp6_dst_gc_list = &rt->dst;
1642	spin_unlock_bh(&icmp6_dst_lock);
1643
1644	fib6_force_start_gc(net);
1645
1646	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649	return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654	struct dst_entry *dst, **pprev;
1655	int more = 0;
1656
1657	spin_lock_bh(&icmp6_dst_lock);
1658	pprev = &icmp6_dst_gc_list;
1659
1660	while ((dst = *pprev) != NULL) {
1661		if (!atomic_read(&dst->__refcnt)) {
1662			*pprev = dst->next;
1663			dst_free(dst);
1664		} else {
1665			pprev = &dst->next;
1666			++more;
1667		}
1668	}
1669
1670	spin_unlock_bh(&icmp6_dst_lock);
1671
1672	return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676			    void *arg)
1677{
1678	struct dst_entry *dst, **pprev;
1679
1680	spin_lock_bh(&icmp6_dst_lock);
1681	pprev = &icmp6_dst_gc_list;
1682	while ((dst = *pprev) != NULL) {
1683		struct rt6_info *rt = (struct rt6_info *) dst;
1684		if (func(rt, arg)) {
1685			*pprev = dst->next;
1686			dst_free(dst);
1687		} else {
1688			pprev = &dst->next;
1689		}
1690	}
1691	spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
1696	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1702	int entries;
1703
1704	entries = dst_entries_get_fast(ops);
 
 
 
1705	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706	    entries <= rt_max_size)
1707		goto out;
1708
1709	net->ipv6.ip6_rt_gc_expire++;
1710	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1711	entries = dst_entries_get_slow(ops);
1712	if (entries < ops->gc_thresh)
1713		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716	return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720			       const struct fib6_config *cfg)
 
1721{
1722	bool ecn_ca = false;
1723	struct nlattr *nla;
1724	int remaining;
1725	u32 *mp;
 
 
 
1726
1727	if (!cfg->fc_mx)
1728		return 0;
 
1729
1730	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731	if (unlikely(!mp))
1732		return -ENOMEM;
1733
1734	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735		int type = nla_type(nla);
1736		u32 val;
1737
1738		if (!type)
1739			continue;
1740		if (unlikely(type > RTAX_MAX))
1741			goto err;
1742
1743		if (type == RTAX_CC_ALGO) {
1744			char tmp[TCP_CA_NAME_MAX];
1745
1746			nla_strlcpy(tmp, nla, sizeof(tmp));
1747			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748			if (val == TCP_CA_UNSPEC)
1749				goto err;
1750		} else {
1751			val = nla_get_u32(nla);
1752		}
1753		if (type == RTAX_HOPLIMIT && val > 255)
1754			val = 255;
1755		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756			goto err;
 
 
 
 
 
 
 
 
1757
1758		mp[type - 1] = val;
1759		__set_bit(type - 1, mxc->mx_valid);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1760	}
1761
1762	if (ecn_ca) {
1763		__set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764		mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
 
 
 
 
 
 
 
 
 
 
 
 
 
1765	}
1766
1767	mxc->mx = mp;
1768	return 0;
1769 err:
1770	kfree(mp);
1771	return -EINVAL;
 
 
 
 
 
 
1772}
1773
1774static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1775{
1776	struct net *net = cfg->fc_nlinfo.nl_net;
1777	struct rt6_info *rt = NULL;
1778	struct net_device *dev = NULL;
1779	struct inet6_dev *idev = NULL;
1780	struct fib6_table *table;
1781	int addr_type;
 
1782	int err = -EINVAL;
1783
1784	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1785		goto out;
1786#ifndef CONFIG_IPV6_SUBTREES
1787	if (cfg->fc_src_len)
 
 
 
 
1788		goto out;
1789#endif
1790	if (cfg->fc_ifindex) {
1791		err = -ENODEV;
1792		dev = dev_get_by_index(net, cfg->fc_ifindex);
1793		if (!dev)
1794			goto out;
1795		idev = in6_dev_get(dev);
1796		if (!idev)
1797			goto out;
1798	}
1799
1800	if (cfg->fc_metric == 0)
1801		cfg->fc_metric = IP6_RT_PRIO_USER;
1802
1803	err = -ENOBUFS;
1804	if (cfg->fc_nlinfo.nlh &&
1805	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1806		table = fib6_get_table(net, cfg->fc_table);
1807		if (!table) {
1808			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1809			table = fib6_new_table(net, cfg->fc_table);
 
 
 
1810		}
1811	} else {
1812		table = fib6_new_table(net, cfg->fc_table);
 
 
 
 
 
 
 
 
 
 
1813	}
1814
1815	if (!table)
1816		goto out;
1817
1818	rt = ip6_dst_alloc(net, NULL,
1819			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
 
 
 
 
 
 
 
1820
1821	if (!rt) {
1822		err = -ENOMEM;
 
 
 
 
1823		goto out;
1824	}
1825
1826	if (cfg->fc_flags & RTF_EXPIRES)
1827		rt6_set_expires(rt, jiffies +
1828				clock_t_to_jiffies(cfg->fc_expires));
1829	else
1830		rt6_clean_expires(rt);
1831
1832	if (cfg->fc_protocol == RTPROT_UNSPEC)
1833		cfg->fc_protocol = RTPROT_BOOT;
1834	rt->rt6i_protocol = cfg->fc_protocol;
 
 
 
 
1835
1836	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
1837
1838	if (addr_type & IPV6_ADDR_MULTICAST)
1839		rt->dst.input = ip6_mc_input;
1840	else if (cfg->fc_flags & RTF_LOCAL)
1841		rt->dst.input = ip6_input;
1842	else
1843		rt->dst.input = ip6_forward;
 
 
1844
1845	rt->dst.output = ip6_output;
 
 
 
 
 
 
 
 
1846
1847	if (cfg->fc_encap) {
1848		struct lwtunnel_state *lwtstate;
 
 
 
 
 
 
 
1849
1850		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1851					   cfg->fc_encap, AF_INET6, cfg,
1852					   &lwtstate);
1853		if (err)
1854			goto out;
1855		rt->dst.lwtstate = lwtstate_get(lwtstate);
1856		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1857			rt->dst.lwtstate->orig_output = rt->dst.output;
1858			rt->dst.output = lwtunnel_output;
1859		}
1860		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1861			rt->dst.lwtstate->orig_input = rt->dst.input;
1862			rt->dst.input = lwtunnel_input;
1863		}
1864	}
1865
1866	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1867	rt->rt6i_dst.plen = cfg->fc_dst_len;
1868	if (rt->rt6i_dst.plen == 128)
1869		rt->dst.flags |= DST_HOST;
 
1870
1871#ifdef CONFIG_IPV6_SUBTREES
1872	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1873	rt->rt6i_src.plen = cfg->fc_src_len;
1874#endif
1875
1876	rt->rt6i_metric = cfg->fc_metric;
1877
1878	/* We cannot add true routes via loopback here,
1879	   they would result in kernel looping; promote them to reject routes
1880	 */
1881	if ((cfg->fc_flags & RTF_REJECT) ||
1882	    (dev && (dev->flags & IFF_LOOPBACK) &&
1883	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1884	     !(cfg->fc_flags & RTF_LOCAL))) {
1885		/* hold loopback dev/idev if we haven't done so. */
1886		if (dev != net->loopback_dev) {
1887			if (dev) {
1888				dev_put(dev);
1889				in6_dev_put(idev);
1890			}
1891			dev = net->loopback_dev;
1892			dev_hold(dev);
1893			idev = in6_dev_get(dev);
1894			if (!idev) {
1895				err = -ENODEV;
1896				goto out;
1897			}
1898		}
1899		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1900		switch (cfg->fc_type) {
1901		case RTN_BLACKHOLE:
1902			rt->dst.error = -EINVAL;
1903			rt->dst.output = dst_discard_out;
1904			rt->dst.input = dst_discard;
1905			break;
1906		case RTN_PROHIBIT:
1907			rt->dst.error = -EACCES;
1908			rt->dst.output = ip6_pkt_prohibit_out;
1909			rt->dst.input = ip6_pkt_prohibit;
1910			break;
1911		case RTN_THROW:
1912		case RTN_UNREACHABLE:
1913		default:
1914			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1915					: (cfg->fc_type == RTN_UNREACHABLE)
1916					? -EHOSTUNREACH : -ENETUNREACH;
1917			rt->dst.output = ip6_pkt_discard_out;
1918			rt->dst.input = ip6_pkt_discard;
1919			break;
1920		}
1921		goto install_route;
1922	}
1923
1924	if (cfg->fc_flags & RTF_GATEWAY) {
1925		const struct in6_addr *gw_addr;
1926		int gwa_type;
 
1927
1928		gw_addr = &cfg->fc_gateway;
1929		gwa_type = ipv6_addr_type(gw_addr);
 
1930
1931		/* if gw_addr is local we will fail to detect this in case
1932		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1933		 * will return already-added prefix route via interface that
1934		 * prefix route was assigned to, which might be non-loopback.
1935		 */
1936		err = -EINVAL;
1937		if (ipv6_chk_addr_and_flags(net, gw_addr,
1938					    gwa_type & IPV6_ADDR_LINKLOCAL ?
1939					    dev : NULL, 0, 0))
1940			goto out;
1941
1942		rt->rt6i_gateway = *gw_addr;
 
 
 
 
1943
1944		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1945			struct rt6_info *grt;
 
 
 
1946
1947			/* IPv6 strictly inhibits using not link-local
1948			   addresses as nexthop address.
1949			   Otherwise, router will not able to send redirects.
1950			   It is very good, but in some (rare!) circumstances
1951			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1952			   some exceptions. --ANK
1953			 */
1954			if (!(gwa_type & IPV6_ADDR_UNICAST))
1955				goto out;
1956
1957			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
 
 
 
1958
1959			err = -EHOSTUNREACH;
1960			if (!grt)
1961				goto out;
1962			if (dev) {
1963				if (dev != grt->dst.dev) {
1964					ip6_rt_put(grt);
1965					goto out;
1966				}
1967			} else {
1968				dev = grt->dst.dev;
1969				idev = grt->rt6i_idev;
1970				dev_hold(dev);
1971				in6_dev_hold(grt->rt6i_idev);
1972			}
1973			if (!(grt->rt6i_flags & RTF_GATEWAY))
1974				err = 0;
1975			ip6_rt_put(grt);
1976
1977			if (err)
1978				goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1979		}
1980		err = -EINVAL;
1981		if (!dev || (dev->flags & IFF_LOOPBACK))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1982			goto out;
1983	}
1984
1985	err = -ENODEV;
1986	if (!dev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1987		goto out;
1988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1989	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 
 
1990		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
 
1991			err = -EINVAL;
1992			goto out;
1993		}
1994		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1995		rt->rt6i_prefsrc.plen = 128;
1996	} else
1997		rt->rt6i_prefsrc.plen = 0;
1998
1999	rt->rt6i_flags = cfg->fc_flags;
2000
2001install_route:
2002	rt->dst.dev = dev;
2003	rt->rt6i_idev = idev;
2004	rt->rt6i_table = table;
2005
2006	cfg->fc_nlinfo.nl_net = dev_net(dev);
2007
2008	return rt;
2009out:
2010	if (dev)
2011		dev_put(dev);
2012	if (idev)
2013		in6_dev_put(idev);
2014	if (rt)
2015		dst_free(&rt->dst);
2016
2017	return ERR_PTR(err);
2018}
2019
2020int ip6_route_add(struct fib6_config *cfg)
 
2021{
2022	struct mx6_config mxc = { .mx = NULL, };
2023	struct rt6_info *rt;
2024	int err;
2025
2026	rt = ip6_route_info_create(cfg);
2027	if (IS_ERR(rt)) {
2028		err = PTR_ERR(rt);
2029		rt = NULL;
2030		goto out;
2031	}
2032
2033	err = ip6_convert_metrics(&mxc, cfg);
2034	if (err)
2035		goto out;
2036
2037	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2038
2039	kfree(mxc.mx);
2040
2041	return err;
2042out:
2043	if (rt)
2044		dst_free(&rt->dst);
2045
2046	return err;
2047}
2048
2049static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2050{
2051	int err;
2052	struct fib6_table *table;
2053	struct net *net = dev_net(rt->dst.dev);
2054
2055	if (rt == net->ipv6.ip6_null_entry ||
2056	    rt->dst.flags & DST_NOCACHE) {
2057		err = -ENOENT;
2058		goto out;
2059	}
2060
2061	table = rt->rt6i_table;
2062	write_lock_bh(&table->tb6_lock);
2063	err = fib6_del(rt, info);
2064	write_unlock_bh(&table->tb6_lock);
2065
2066out:
2067	ip6_rt_put(rt);
2068	return err;
2069}
2070
2071int ip6_del_rt(struct rt6_info *rt)
2072{
2073	struct nl_info info = {
2074		.nl_net = dev_net(rt->dst.dev),
 
2075	};
 
2076	return __ip6_del_rt(rt, &info);
2077}
2078
2079static int ip6_route_del(struct fib6_config *cfg)
2080{
 
 
 
2081	struct fib6_table *table;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2082	struct fib6_node *fn;
2083	struct rt6_info *rt;
2084	int err = -ESRCH;
2085
2086	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2087	if (!table)
 
2088		return err;
 
2089
2090	read_lock_bh(&table->tb6_lock);
2091
2092	fn = fib6_locate(&table->tb6_root,
2093			 &cfg->fc_dst, cfg->fc_dst_len,
2094			 &cfg->fc_src, cfg->fc_src_len);
 
2095
2096	if (fn) {
2097		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2098			if ((rt->rt6i_flags & RTF_CACHE) &&
2099			    !(cfg->fc_flags & RTF_CACHE))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2100				continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2101			if (cfg->fc_ifindex &&
2102			    (!rt->dst.dev ||
2103			     rt->dst.dev->ifindex != cfg->fc_ifindex))
2104				continue;
2105			if (cfg->fc_flags & RTF_GATEWAY &&
2106			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2107				continue;
2108			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2109				continue;
2110			dst_hold(&rt->dst);
2111			read_unlock_bh(&table->tb6_lock);
2112
2113			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
 
 
 
2114		}
2115	}
2116	read_unlock_bh(&table->tb6_lock);
2117
2118	return err;
2119}
2120
2121static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2122{
2123	struct netevent_redirect netevent;
2124	struct rt6_info *rt, *nrt = NULL;
 
2125	struct ndisc_options ndopts;
2126	struct inet6_dev *in6_dev;
2127	struct neighbour *neigh;
2128	struct rd_msg *msg;
2129	int optlen, on_link;
2130	u8 *lladdr;
2131
2132	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2133	optlen -= sizeof(*msg);
2134
2135	if (optlen < 0) {
2136		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2137		return;
2138	}
2139
2140	msg = (struct rd_msg *)icmp6_hdr(skb);
2141
2142	if (ipv6_addr_is_multicast(&msg->dest)) {
2143		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2144		return;
2145	}
2146
2147	on_link = 0;
2148	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2149		on_link = 1;
2150	} else if (ipv6_addr_type(&msg->target) !=
2151		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2152		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2153		return;
2154	}
2155
2156	in6_dev = __in6_dev_get(skb->dev);
2157	if (!in6_dev)
2158		return;
2159	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2160		return;
2161
2162	/* RFC2461 8.1:
2163	 *	The IP source address of the Redirect MUST be the same as the current
2164	 *	first-hop router for the specified ICMP Destination Address.
2165	 */
2166
2167	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2168		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2169		return;
2170	}
2171
2172	lladdr = NULL;
2173	if (ndopts.nd_opts_tgt_lladdr) {
2174		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2175					     skb->dev);
2176		if (!lladdr) {
2177			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2178			return;
2179		}
2180	}
2181
2182	rt = (struct rt6_info *) dst;
2183	if (rt->rt6i_flags & RTF_REJECT) {
2184		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2185		return;
2186	}
2187
2188	/* Redirect received -> path was valid.
2189	 * Look, redirects are sent only in response to data packets,
2190	 * so that this nexthop apparently is reachable. --ANK
2191	 */
2192	dst_confirm(&rt->dst);
2193
2194	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2195	if (!neigh)
2196		return;
2197
2198	/*
2199	 *	We have finally decided to accept it.
2200	 */
2201
2202	neigh_update(neigh, lladdr, NUD_STALE,
2203		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2204		     NEIGH_UPDATE_F_OVERRIDE|
2205		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2206				     NEIGH_UPDATE_F_ISROUTER))
2207		     );
 
 
 
 
 
2208
2209	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2210	if (!nrt)
2211		goto out;
2212
2213	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2214	if (on_link)
2215		nrt->rt6i_flags &= ~RTF_GATEWAY;
2216
2217	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2218
2219	if (ip6_ins_rt(nrt))
 
 
2220		goto out;
 
2221
2222	netevent.old = &rt->dst;
2223	netevent.new = &nrt->dst;
2224	netevent.daddr = &msg->dest;
2225	netevent.neigh = neigh;
2226	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2227
2228	if (rt->rt6i_flags & RTF_CACHE) {
2229		rt = (struct rt6_info *) dst_clone(&rt->dst);
2230		ip6_del_rt(rt);
2231	}
2232
2233out:
 
2234	neigh_release(neigh);
2235}
2236
2237/*
2238 *	Misc support functions
2239 */
2240
2241static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2242{
2243	BUG_ON(from->dst.from);
2244
2245	rt->rt6i_flags &= ~RTF_EXPIRES;
2246	dst_hold(&from->dst);
2247	rt->dst.from = &from->dst;
2248	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2249}
2250
2251static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2252{
2253	rt->dst.input = ort->dst.input;
2254	rt->dst.output = ort->dst.output;
2255	rt->rt6i_dst = ort->rt6i_dst;
2256	rt->dst.error = ort->dst.error;
2257	rt->rt6i_idev = ort->rt6i_idev;
2258	if (rt->rt6i_idev)
2259		in6_dev_hold(rt->rt6i_idev);
2260	rt->dst.lastuse = jiffies;
2261	rt->rt6i_gateway = ort->rt6i_gateway;
2262	rt->rt6i_flags = ort->rt6i_flags;
2263	rt6_set_from(rt, ort);
2264	rt->rt6i_metric = ort->rt6i_metric;
2265#ifdef CONFIG_IPV6_SUBTREES
2266	rt->rt6i_src = ort->rt6i_src;
2267#endif
2268	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2269	rt->rt6i_table = ort->rt6i_table;
2270	rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2271}
2272
2273#ifdef CONFIG_IPV6_ROUTE_INFO
2274static struct rt6_info *rt6_get_route_info(struct net *net,
2275					   const struct in6_addr *prefix, int prefixlen,
2276					   const struct in6_addr *gwaddr, int ifindex)
 
2277{
 
 
2278	struct fib6_node *fn;
2279	struct rt6_info *rt = NULL;
2280	struct fib6_table *table;
2281
2282	table = fib6_get_table(net, RT6_TABLE_INFO);
2283	if (!table)
2284		return NULL;
2285
2286	read_lock_bh(&table->tb6_lock);
2287	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2288	if (!fn)
2289		goto out;
2290
2291	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2292		if (rt->dst.dev->ifindex != ifindex)
 
2293			continue;
2294		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2295			continue;
2296		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
 
 
 
 
2297			continue;
2298		dst_hold(&rt->dst);
2299		break;
2300	}
2301out:
2302	read_unlock_bh(&table->tb6_lock);
2303	return rt;
2304}
2305
2306static struct rt6_info *rt6_add_route_info(struct net *net,
2307					   const struct in6_addr *prefix, int prefixlen,
2308					   const struct in6_addr *gwaddr, int ifindex,
 
2309					   unsigned int pref)
2310{
2311	struct fib6_config cfg = {
2312		.fc_metric	= IP6_RT_PRIO_USER,
2313		.fc_ifindex	= ifindex,
2314		.fc_dst_len	= prefixlen,
2315		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2316				  RTF_UP | RTF_PREF(pref),
 
 
2317		.fc_nlinfo.portid = 0,
2318		.fc_nlinfo.nlh = NULL,
2319		.fc_nlinfo.nl_net = net,
2320	};
2321
2322	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2323	cfg.fc_dst = *prefix;
2324	cfg.fc_gateway = *gwaddr;
2325
2326	/* We should treat it as a default route if prefix length is 0. */
2327	if (!prefixlen)
2328		cfg.fc_flags |= RTF_DEFAULT;
2329
2330	ip6_route_add(&cfg);
2331
2332	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2333}
2334#endif
2335
2336struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 
 
2337{
2338	struct rt6_info *rt;
 
2339	struct fib6_table *table;
2340
2341	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2342	if (!table)
2343		return NULL;
2344
2345	read_lock_bh(&table->tb6_lock);
2346	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2347		if (dev == rt->dst.dev &&
2348		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2349		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
 
 
 
 
 
 
2350			break;
2351	}
2352	if (rt)
2353		dst_hold(&rt->dst);
2354	read_unlock_bh(&table->tb6_lock);
2355	return rt;
2356}
2357
2358struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
2359				     struct net_device *dev,
2360				     unsigned int pref)
2361{
2362	struct fib6_config cfg = {
2363		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2364		.fc_metric	= IP6_RT_PRIO_USER,
2365		.fc_ifindex	= dev->ifindex,
2366		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2367				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 
 
2368		.fc_nlinfo.portid = 0,
2369		.fc_nlinfo.nlh = NULL,
2370		.fc_nlinfo.nl_net = dev_net(dev),
2371	};
2372
2373	cfg.fc_gateway = *gwaddr;
2374
2375	ip6_route_add(&cfg);
 
2376
2377	return rt6_get_dflt_router(gwaddr, dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2378}
2379
2380void rt6_purge_dflt_routers(struct net *net)
2381{
2382	struct rt6_info *rt;
2383	struct fib6_table *table;
 
 
2384
2385	/* NOTE: Keep consistent with rt6_get_dflt_router */
2386	table = fib6_get_table(net, RT6_TABLE_DFLT);
2387	if (!table)
2388		return;
2389
2390restart:
2391	read_lock_bh(&table->tb6_lock);
2392	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2394		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2395			dst_hold(&rt->dst);
2396			read_unlock_bh(&table->tb6_lock);
2397			ip6_del_rt(rt);
2398			goto restart;
2399		}
2400	}
2401	read_unlock_bh(&table->tb6_lock);
 
2402}
2403
2404static void rtmsg_to_fib6_config(struct net *net,
2405				 struct in6_rtmsg *rtmsg,
2406				 struct fib6_config *cfg)
2407{
2408	memset(cfg, 0, sizeof(*cfg));
 
 
 
 
 
 
 
 
 
2409
2410	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2411			 : RT6_TABLE_MAIN;
2412	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2413	cfg->fc_metric = rtmsg->rtmsg_metric;
2414	cfg->fc_expires = rtmsg->rtmsg_info;
2415	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2416	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2417	cfg->fc_flags = rtmsg->rtmsg_flags;
2418
2419	cfg->fc_nlinfo.nl_net = net;
2420
2421	cfg->fc_dst = rtmsg->rtmsg_dst;
2422	cfg->fc_src = rtmsg->rtmsg_src;
2423	cfg->fc_gateway = rtmsg->rtmsg_gateway;
 
2424}
2425
2426int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2427{
2428	struct fib6_config cfg;
2429	struct in6_rtmsg rtmsg;
2430	int err;
2431
2432	switch (cmd) {
2433	case SIOCADDRT:		/* Add a route */
2434	case SIOCDELRT:		/* Delete a route */
2435		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2436			return -EPERM;
2437		err = copy_from_user(&rtmsg, arg,
2438				     sizeof(struct in6_rtmsg));
2439		if (err)
2440			return -EFAULT;
2441
2442		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2443
2444		rtnl_lock();
2445		switch (cmd) {
2446		case SIOCADDRT:
2447			err = ip6_route_add(&cfg);
2448			break;
2449		case SIOCDELRT:
2450			err = ip6_route_del(&cfg);
2451			break;
2452		default:
2453			err = -EINVAL;
2454		}
2455		rtnl_unlock();
2456
2457		return err;
 
 
 
 
 
 
 
2458	}
2459
2460	return -EINVAL;
2461}
2462
2463/*
2464 *	Drop the packet on the floor
2465 */
2466
2467static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2468{
2469	int type;
2470	struct dst_entry *dst = skb_dst(skb);
 
 
 
 
 
 
 
 
 
 
2471	switch (ipstats_mib_noroutes) {
2472	case IPSTATS_MIB_INNOROUTES:
2473		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2474		if (type == IPV6_ADDR_ANY) {
2475			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2476				      IPSTATS_MIB_INADDRERRORS);
2477			break;
2478		}
2479		/* FALLTHROUGH */
2480	case IPSTATS_MIB_OUTNOROUTES:
2481		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2482			      ipstats_mib_noroutes);
2483		break;
2484	}
 
 
 
 
 
2485	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2486	kfree_skb(skb);
2487	return 0;
2488}
2489
2490static int ip6_pkt_discard(struct sk_buff *skb)
2491{
2492	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2493}
2494
2495static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2496{
2497	skb->dev = skb_dst(skb)->dev;
2498	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2499}
2500
2501static int ip6_pkt_prohibit(struct sk_buff *skb)
2502{
2503	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2504}
2505
2506static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2507{
2508	skb->dev = skb_dst(skb)->dev;
2509	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2510}
2511
2512/*
2513 *	Allocate a dst for local (unicast / anycast) address.
2514 */
2515
2516struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517				    const struct in6_addr *addr,
2518				    bool anycast)
2519{
2520	u32 tb_id;
2521	struct net *net = dev_net(idev->dev);
2522	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2523					    DST_NOCOUNT);
2524	if (!rt)
2525		return ERR_PTR(-ENOMEM);
2526
2527	in6_dev_hold(idev);
2528
2529	rt->dst.flags |= DST_HOST;
2530	rt->dst.input = ip6_input;
2531	rt->dst.output = ip6_output;
2532	rt->rt6i_idev = idev;
2533
2534	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2535	if (anycast)
2536		rt->rt6i_flags |= RTF_ANYCAST;
2537	else
2538		rt->rt6i_flags |= RTF_LOCAL;
2539
2540	rt->rt6i_gateway  = *addr;
2541	rt->rt6i_dst.addr = *addr;
2542	rt->rt6i_dst.plen = 128;
2543	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2544	rt->rt6i_table = fib6_get_table(net, tb_id);
2545	rt->dst.flags |= DST_NOCACHE;
2546
2547	atomic_set(&rt->dst.__refcnt, 1);
2548
2549	return rt;
2550}
 
 
 
 
 
2551
2552int ip6_route_get_saddr(struct net *net,
2553			struct rt6_info *rt,
2554			const struct in6_addr *daddr,
2555			unsigned int prefs,
2556			struct in6_addr *saddr)
2557{
2558	struct inet6_dev *idev =
2559		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2560	int err = 0;
2561	if (rt && rt->rt6i_prefsrc.plen)
2562		*saddr = rt->rt6i_prefsrc.addr;
2563	else
2564		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2565					 daddr, prefs, saddr);
2566	return err;
2567}
2568
2569/* remove deleted ip from prefsrc entries */
2570struct arg_dev_net_ip {
2571	struct net_device *dev;
2572	struct net *net;
2573	struct in6_addr *addr;
2574};
2575
2576static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2577{
2578	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2579	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2580	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2581
2582	if (((void *)rt->dst.dev == dev || !dev) &&
2583	    rt != net->ipv6.ip6_null_entry &&
2584	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
 
 
2585		/* remove prefsrc entry */
2586		rt->rt6i_prefsrc.plen = 0;
 
2587	}
2588	return 0;
2589}
2590
2591void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2592{
2593	struct net *net = dev_net(ifp->idev->dev);
2594	struct arg_dev_net_ip adni = {
2595		.dev = ifp->idev->dev,
2596		.net = net,
2597		.addr = &ifp->addr,
2598	};
2599	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2600}
2601
2602#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2603#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2604
2605/* Remove routers and update dst entries when gateway turn into host. */
2606static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2607{
2608	struct in6_addr *gateway = (struct in6_addr *)arg;
 
2609
2610	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2611	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2612	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
 
 
 
 
2613		return -1;
2614	}
 
 
 
 
 
 
2615	return 0;
2616}
2617
2618void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2619{
2620	fib6_clean_all(net, fib6_clean_tohost, gateway);
2621}
2622
2623struct arg_dev_net {
2624	struct net_device *dev;
2625	struct net *net;
 
 
 
2626};
2627
2628static int fib6_ifdown(struct rt6_info *rt, void *arg)
2629{
2630	const struct arg_dev_net *adn = arg;
2631	const struct net_device *dev = adn->dev;
2632
2633	if ((rt->dst.dev == dev || !dev) &&
2634	    rt != adn->net->ipv6.ip6_null_entry)
2635		return -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2636
2637	return 0;
2638}
2639
2640void rt6_ifdown(struct net *net, struct net_device *dev)
2641{
2642	struct arg_dev_net adn = {
2643		.dev = dev,
2644		.net = net,
 
 
2645	};
2646
2647	fib6_clean_all(net, fib6_ifdown, &adn);
2648	icmp6_clean_all(fib6_ifdown, &adn);
2649	if (dev)
2650		rt6_uncached_list_flush_dev(net, dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2651}
2652
2653struct rt6_mtu_change_arg {
2654	struct net_device *dev;
2655	unsigned int mtu;
 
2656};
2657
2658static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2659{
2660	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2661	struct inet6_dev *idev;
2662
2663	/* In IPv6 pmtu discovery is not optional,
2664	   so that RTAX_MTU lock cannot disable it.
2665	   We still use this lock to block changes
2666	   caused by addrconf/ndisc.
2667	*/
2668
2669	idev = __in6_dev_get(arg->dev);
2670	if (!idev)
2671		return 0;
2672
2673	/* For administrative MTU increase, there is no way to discover
2674	   IPv6 PMTU increase, so PMTU increase should be updated here.
2675	   Since RFC 1981 doesn't include administrative MTU increase
2676	   update PMTU increase is a MUST. (i.e. jumbo frame)
2677	 */
2678	/*
2679	   If new MTU is less than route PMTU, this new MTU will be the
2680	   lowest MTU in the path, update the route PMTU to reflect PMTU
2681	   decreases; if new MTU is greater than route PMTU, and the
2682	   old MTU is the lowest MTU in the path, update the route PMTU
2683	   to reflect the increase. In this case if the other nodes' MTU
2684	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2685	   PMTU discouvery.
2686	 */
2687	if (rt->dst.dev == arg->dev &&
2688	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2689		if (rt->rt6i_flags & RTF_CACHE) {
2690			/* For RTF_CACHE with rt6i_pmtu == 0
2691			 * (i.e. a redirected route),
2692			 * the metrics of its rt->dst.from has already
2693			 * been updated.
2694			 */
2695			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2696				rt->rt6i_pmtu = arg->mtu;
2697		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2698			   (dst_mtu(&rt->dst) < arg->mtu &&
2699			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2700			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2701		}
2702	}
2703	return 0;
 
2704}
2705
2706void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2707{
2708	struct rt6_mtu_change_arg arg = {
2709		.dev = dev,
2710		.mtu = mtu,
2711	};
2712
2713	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2714}
2715
2716static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2717	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2718	[RTA_OIF]               = { .type = NLA_U32 },
2719	[RTA_IIF]		= { .type = NLA_U32 },
2720	[RTA_PRIORITY]          = { .type = NLA_U32 },
2721	[RTA_METRICS]           = { .type = NLA_NESTED },
2722	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2723	[RTA_PREF]              = { .type = NLA_U8 },
2724	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2725	[RTA_ENCAP]		= { .type = NLA_NESTED },
2726	[RTA_EXPIRES]		= { .type = NLA_U32 },
 
 
 
 
 
 
 
2727};
2728
2729static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2730			      struct fib6_config *cfg)
 
2731{
2732	struct rtmsg *rtm;
2733	struct nlattr *tb[RTA_MAX+1];
2734	unsigned int pref;
2735	int err;
2736
2737	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2738	if (err < 0)
2739		goto errout;
2740
2741	err = -EINVAL;
2742	rtm = nlmsg_data(nlh);
2743	memset(cfg, 0, sizeof(*cfg));
2744
2745	cfg->fc_table = rtm->rtm_table;
2746	cfg->fc_dst_len = rtm->rtm_dst_len;
2747	cfg->fc_src_len = rtm->rtm_src_len;
2748	cfg->fc_flags = RTF_UP;
2749	cfg->fc_protocol = rtm->rtm_protocol;
2750	cfg->fc_type = rtm->rtm_type;
 
 
 
 
 
 
2751
2752	if (rtm->rtm_type == RTN_UNREACHABLE ||
2753	    rtm->rtm_type == RTN_BLACKHOLE ||
2754	    rtm->rtm_type == RTN_PROHIBIT ||
2755	    rtm->rtm_type == RTN_THROW)
2756		cfg->fc_flags |= RTF_REJECT;
2757
2758	if (rtm->rtm_type == RTN_LOCAL)
2759		cfg->fc_flags |= RTF_LOCAL;
2760
2761	if (rtm->rtm_flags & RTM_F_CLONED)
2762		cfg->fc_flags |= RTF_CACHE;
2763
2764	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2765	cfg->fc_nlinfo.nlh = nlh;
2766	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
 
 
 
 
 
 
 
2767
2768	if (tb[RTA_GATEWAY]) {
2769		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2770		cfg->fc_flags |= RTF_GATEWAY;
2771	}
 
 
 
 
2772
2773	if (tb[RTA_DST]) {
2774		int plen = (rtm->rtm_dst_len + 7) >> 3;
2775
2776		if (nla_len(tb[RTA_DST]) < plen)
2777			goto errout;
2778
2779		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2780	}
2781
2782	if (tb[RTA_SRC]) {
2783		int plen = (rtm->rtm_src_len + 7) >> 3;
2784
2785		if (nla_len(tb[RTA_SRC]) < plen)
2786			goto errout;
2787
2788		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2789	}
2790
2791	if (tb[RTA_PREFSRC])
2792		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2793
2794	if (tb[RTA_OIF])
2795		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2796
2797	if (tb[RTA_PRIORITY])
2798		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2799
2800	if (tb[RTA_METRICS]) {
2801		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2802		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2803	}
2804
2805	if (tb[RTA_TABLE])
2806		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2807
2808	if (tb[RTA_MULTIPATH]) {
2809		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2810		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 
 
 
 
 
2811	}
2812
2813	if (tb[RTA_PREF]) {
2814		pref = nla_get_u8(tb[RTA_PREF]);
2815		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2816		    pref != ICMPV6_ROUTER_PREF_HIGH)
2817			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2818		cfg->fc_flags |= RTF_PREF(pref);
2819	}
2820
2821	if (tb[RTA_ENCAP])
2822		cfg->fc_encap = tb[RTA_ENCAP];
2823
2824	if (tb[RTA_ENCAP_TYPE])
2825		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2826
 
 
 
 
 
2827	if (tb[RTA_EXPIRES]) {
2828		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2829
2830		if (addrconf_finite_timeout(timeout)) {
2831			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2832			cfg->fc_flags |= RTF_EXPIRES;
2833		}
2834	}
2835
2836	err = 0;
2837errout:
2838	return err;
2839}
2840
2841struct rt6_nh {
2842	struct rt6_info *rt6_info;
2843	struct fib6_config r_cfg;
2844	struct mx6_config mxc;
2845	struct list_head next;
2846};
2847
2848static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2849{
2850	struct rt6_nh *nh;
2851
2852	list_for_each_entry(nh, rt6_nh_list, next) {
2853		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2854		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2855		        nh->r_cfg.fc_ifindex);
2856	}
2857}
2858
2859static int ip6_route_info_append(struct list_head *rt6_nh_list,
2860				 struct rt6_info *rt, struct fib6_config *r_cfg)
2861{
2862	struct rt6_nh *nh;
2863	struct rt6_info *rtnh;
2864	int err = -EEXIST;
2865
2866	list_for_each_entry(nh, rt6_nh_list, next) {
2867		/* check if rt6_info already exists */
2868		rtnh = nh->rt6_info;
2869
2870		if (rtnh->dst.dev == rt->dst.dev &&
2871		    rtnh->rt6i_idev == rt->rt6i_idev &&
2872		    ipv6_addr_equal(&rtnh->rt6i_gateway,
2873				    &rt->rt6i_gateway))
2874			return err;
2875	}
2876
2877	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2878	if (!nh)
2879		return -ENOMEM;
2880	nh->rt6_info = rt;
2881	err = ip6_convert_metrics(&nh->mxc, r_cfg);
2882	if (err) {
2883		kfree(nh);
2884		return err;
2885	}
2886	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2887	list_add_tail(&nh->next, rt6_nh_list);
2888
2889	return 0;
2890}
2891
2892static int ip6_route_multipath_add(struct fib6_config *cfg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2893{
 
 
2894	struct fib6_config r_cfg;
2895	struct rtnexthop *rtnh;
2896	struct rt6_info *rt;
2897	struct rt6_nh *err_nh;
2898	struct rt6_nh *nh, *nh_safe;
 
2899	int remaining;
2900	int attrlen;
2901	int err = 1;
2902	int nhn = 0;
2903	int replace = (cfg->fc_nlinfo.nlh &&
2904		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2905	LIST_HEAD(rt6_nh_list);
2906
 
 
 
 
2907	remaining = cfg->fc_mp_len;
2908	rtnh = (struct rtnexthop *)cfg->fc_mp;
2909
2910	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
2911	 * rt6_info structs per nexthop
2912	 */
2913	while (rtnh_ok(rtnh, remaining)) {
2914		memcpy(&r_cfg, cfg, sizeof(*cfg));
2915		if (rtnh->rtnh_ifindex)
2916			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2917
2918		attrlen = rtnh_attrlen(rtnh);
2919		if (attrlen > 0) {
2920			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2921
2922			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2923			if (nla) {
2924				r_cfg.fc_gateway = nla_get_in6_addr(nla);
2925				r_cfg.fc_flags |= RTF_GATEWAY;
2926			}
2927			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2928			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2929			if (nla)
2930				r_cfg.fc_encap_type = nla_get_u16(nla);
2931		}
2932
2933		rt = ip6_route_info_create(&r_cfg);
 
2934		if (IS_ERR(rt)) {
2935			err = PTR_ERR(rt);
2936			rt = NULL;
2937			goto cleanup;
2938		}
 
 
 
 
 
 
 
 
 
2939
2940		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 
2941		if (err) {
2942			dst_free(&rt->dst);
2943			goto cleanup;
2944		}
2945
2946		rtnh = rtnh_next(rtnh, &remaining);
2947	}
2948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2949	err_nh = NULL;
2950	list_for_each_entry(nh, &rt6_nh_list, next) {
2951		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2952		/* nh->rt6_info is used or freed at this point, reset to NULL*/
2953		nh->rt6_info = NULL;
 
 
 
 
 
 
 
 
 
 
 
2954		if (err) {
2955			if (replace && nhn)
2956				ip6_print_replace_route_err(&rt6_nh_list);
 
2957			err_nh = nh;
2958			goto add_errout;
2959		}
2960
2961		/* Because each route is added like a single route we remove
2962		 * these flags after the first nexthop: if there is a collision,
2963		 * we have already failed to add the first nexthop:
2964		 * fib6_add_rt2node() has rejected it; when replacing, old
2965		 * nexthops have been replaced by first new, the rest should
2966		 * be added to it.
2967		 */
2968		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2969						     NLM_F_REPLACE);
 
2970		nhn++;
2971	}
2972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2973	goto cleanup;
2974
2975add_errout:
 
 
 
 
 
 
 
2976	/* Delete routes that were already added */
2977	list_for_each_entry(nh, &rt6_nh_list, next) {
2978		if (err_nh == nh)
2979			break;
2980		ip6_route_del(&nh->r_cfg);
2981	}
2982
2983cleanup:
2984	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2985		if (nh->rt6_info)
2986			dst_free(&nh->rt6_info->dst);
2987		kfree(nh->mxc.mx);
2988		list_del(&nh->next);
2989		kfree(nh);
2990	}
2991
2992	return err;
2993}
2994
2995static int ip6_route_multipath_del(struct fib6_config *cfg)
 
2996{
2997	struct fib6_config r_cfg;
2998	struct rtnexthop *rtnh;
2999	int remaining;
3000	int attrlen;
3001	int err = 1, last_err = 0;
3002
3003	remaining = cfg->fc_mp_len;
3004	rtnh = (struct rtnexthop *)cfg->fc_mp;
3005
3006	/* Parse a Multipath Entry */
3007	while (rtnh_ok(rtnh, remaining)) {
3008		memcpy(&r_cfg, cfg, sizeof(*cfg));
3009		if (rtnh->rtnh_ifindex)
3010			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3011
3012		attrlen = rtnh_attrlen(rtnh);
3013		if (attrlen > 0) {
3014			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3015
3016			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3017			if (nla) {
3018				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3019				r_cfg.fc_flags |= RTF_GATEWAY;
3020			}
3021		}
3022		err = ip6_route_del(&r_cfg);
3023		if (err)
3024			last_err = err;
3025
3026		rtnh = rtnh_next(rtnh, &remaining);
3027	}
3028
3029	return last_err;
3030}
3031
3032static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 
3033{
3034	struct fib6_config cfg;
3035	int err;
3036
3037	err = rtm_to_fib6_config(skb, nlh, &cfg);
3038	if (err < 0)
3039		return err;
3040
 
 
 
 
 
 
3041	if (cfg.fc_mp)
3042		return ip6_route_multipath_del(&cfg);
3043	else
3044		return ip6_route_del(&cfg);
 
 
3045}
3046
3047static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 
3048{
3049	struct fib6_config cfg;
3050	int err;
3051
3052	err = rtm_to_fib6_config(skb, nlh, &cfg);
3053	if (err < 0)
3054		return err;
3055
 
 
 
3056	if (cfg.fc_mp)
3057		return ip6_route_multipath_add(&cfg);
3058	else
3059		return ip6_route_add(&cfg);
3060}
3061
3062static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
 
3063{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3064	return NLMSG_ALIGN(sizeof(struct rtmsg))
3065	       + nla_total_size(16) /* RTA_SRC */
3066	       + nla_total_size(16) /* RTA_DST */
3067	       + nla_total_size(16) /* RTA_GATEWAY */
3068	       + nla_total_size(16) /* RTA_PREFSRC */
3069	       + nla_total_size(4) /* RTA_TABLE */
3070	       + nla_total_size(4) /* RTA_IIF */
3071	       + nla_total_size(4) /* RTA_OIF */
3072	       + nla_total_size(4) /* RTA_PRIORITY */
3073	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3074	       + nla_total_size(sizeof(struct rta_cacheinfo))
3075	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3076	       + nla_total_size(1) /* RTA_PREF */
3077	       + lwtunnel_get_encap_size(rt->dst.lwtstate);
3078}
3079
3080static int rt6_fill_node(struct net *net,
3081			 struct sk_buff *skb, struct rt6_info *rt,
3082			 struct in6_addr *dst, struct in6_addr *src,
3083			 int iif, int type, u32 portid, u32 seq,
3084			 int prefix, int nowait, unsigned int flags)
3085{
3086	u32 metrics[RTAX_MAX];
3087	struct rtmsg *rtm;
3088	struct nlmsghdr *nlh;
3089	long expires;
3090	u32 table;
3091
3092	if (prefix) {	/* user wants prefix routes only */
3093		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3094			/* success since this is not a prefix route */
3095			return 1;
3096		}
 
 
 
 
 
 
 
 
 
 
3097	}
3098
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3099	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3100	if (!nlh)
3101		return -EMSGSIZE;
3102
 
 
 
 
 
 
 
 
 
 
3103	rtm = nlmsg_data(nlh);
3104	rtm->rtm_family = AF_INET6;
3105	rtm->rtm_dst_len = rt->rt6i_dst.plen;
3106	rtm->rtm_src_len = rt->rt6i_src.plen;
3107	rtm->rtm_tos = 0;
3108	if (rt->rt6i_table)
3109		table = rt->rt6i_table->tb6_id;
3110	else
3111		table = RT6_TABLE_UNSPEC;
3112	rtm->rtm_table = table;
3113	if (nla_put_u32(skb, RTA_TABLE, table))
3114		goto nla_put_failure;
3115	if (rt->rt6i_flags & RTF_REJECT) {
3116		switch (rt->dst.error) {
3117		case -EINVAL:
3118			rtm->rtm_type = RTN_BLACKHOLE;
3119			break;
3120		case -EACCES:
3121			rtm->rtm_type = RTN_PROHIBIT;
3122			break;
3123		case -EAGAIN:
3124			rtm->rtm_type = RTN_THROW;
3125			break;
3126		default:
3127			rtm->rtm_type = RTN_UNREACHABLE;
3128			break;
3129		}
3130	}
3131	else if (rt->rt6i_flags & RTF_LOCAL)
3132		rtm->rtm_type = RTN_LOCAL;
3133	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3134		rtm->rtm_type = RTN_LOCAL;
3135	else
3136		rtm->rtm_type = RTN_UNICAST;
3137	rtm->rtm_flags = 0;
3138	if (!netif_carrier_ok(rt->dst.dev)) {
3139		rtm->rtm_flags |= RTNH_F_LINKDOWN;
3140		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3141			rtm->rtm_flags |= RTNH_F_DEAD;
3142	}
3143	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3144	rtm->rtm_protocol = rt->rt6i_protocol;
3145	if (rt->rt6i_flags & RTF_DYNAMIC)
3146		rtm->rtm_protocol = RTPROT_REDIRECT;
3147	else if (rt->rt6i_flags & RTF_ADDRCONF) {
3148		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3149			rtm->rtm_protocol = RTPROT_RA;
3150		else
3151			rtm->rtm_protocol = RTPROT_KERNEL;
3152	}
3153
3154	if (rt->rt6i_flags & RTF_CACHE)
3155		rtm->rtm_flags |= RTM_F_CLONED;
3156
3157	if (dst) {
3158		if (nla_put_in6_addr(skb, RTA_DST, dst))
3159			goto nla_put_failure;
3160		rtm->rtm_dst_len = 128;
3161	} else if (rtm->rtm_dst_len)
3162		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3163			goto nla_put_failure;
3164#ifdef CONFIG_IPV6_SUBTREES
3165	if (src) {
3166		if (nla_put_in6_addr(skb, RTA_SRC, src))
3167			goto nla_put_failure;
3168		rtm->rtm_src_len = 128;
3169	} else if (rtm->rtm_src_len &&
3170		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3171		goto nla_put_failure;
3172#endif
3173	if (iif) {
3174#ifdef CONFIG_IPV6_MROUTE
3175		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3176			int err = ip6mr_get_route(net, skb, rtm, nowait);
3177			if (err <= 0) {
3178				if (!nowait) {
3179					if (err == 0)
3180						return 0;
3181					goto nla_put_failure;
3182				} else {
3183					if (err == -EMSGSIZE)
3184						goto nla_put_failure;
3185				}
3186			}
3187		} else
3188#endif
3189			if (nla_put_u32(skb, RTA_IIF, iif))
3190				goto nla_put_failure;
3191	} else if (dst) {
3192		struct in6_addr saddr_buf;
3193		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3194		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3195			goto nla_put_failure;
3196	}
3197
3198	if (rt->rt6i_prefsrc.plen) {
3199		struct in6_addr saddr_buf;
3200		saddr_buf = rt->rt6i_prefsrc.addr;
3201		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3202			goto nla_put_failure;
3203	}
3204
3205	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3206	if (rt->rt6i_pmtu)
3207		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3208	if (rtnetlink_put_metrics(skb, metrics) < 0)
3209		goto nla_put_failure;
3210
3211	if (rt->rt6i_flags & RTF_GATEWAY) {
3212		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3213			goto nla_put_failure;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3214	}
3215
3216	if (rt->dst.dev &&
3217	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3218		goto nla_put_failure;
3219	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3220		goto nla_put_failure;
3221
3222	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
 
 
 
 
 
3223
3224	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3225		goto nla_put_failure;
3226
3227	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3228		goto nla_put_failure;
3229
3230	lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3231
3232	nlmsg_end(skb, nlh);
3233	return 0;
3234
3235nla_put_failure:
3236	nlmsg_cancel(skb, nlh);
3237	return -EMSGSIZE;
3238}
3239
3240int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3241{
3242	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3243	int prefix;
 
 
 
3244
3245	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3246		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3247		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3248	} else
3249		prefix = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3250
3251	return rt6_fill_node(arg->net,
3252		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3253		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3254		     prefix, 0, NLM_F_MULTI);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3255}
3256
3257static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 
3258{
3259	struct net *net = sock_net(in_skb->sk);
3260	struct nlattr *tb[RTA_MAX+1];
 
 
 
3261	struct rt6_info *rt;
3262	struct sk_buff *skb;
3263	struct rtmsg *rtm;
3264	struct flowi6 fl6;
3265	int err, iif = 0, oif = 0;
3266
3267	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3268	if (err < 0)
3269		goto errout;
3270
3271	err = -EINVAL;
3272	memset(&fl6, 0, sizeof(fl6));
 
 
3273
3274	if (tb[RTA_SRC]) {
3275		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3276			goto errout;
3277
3278		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3279	}
3280
3281	if (tb[RTA_DST]) {
3282		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3283			goto errout;
3284
3285		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3286	}
3287
3288	if (tb[RTA_IIF])
3289		iif = nla_get_u32(tb[RTA_IIF]);
3290
3291	if (tb[RTA_OIF])
3292		oif = nla_get_u32(tb[RTA_OIF]);
3293
3294	if (tb[RTA_MARK])
3295		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3297	if (iif) {
3298		struct net_device *dev;
3299		int flags = 0;
3300
3301		dev = __dev_get_by_index(net, iif);
 
 
3302		if (!dev) {
 
3303			err = -ENODEV;
3304			goto errout;
3305		}
3306
3307		fl6.flowi6_iif = iif;
3308
3309		if (!ipv6_addr_any(&fl6.saddr))
3310			flags |= RT6_LOOKUP_F_HAS_SADDR;
3311
3312		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3313							       flags);
 
3314	} else {
3315		fl6.flowi6_oif = oif;
3316
3317		if (netif_index_is_l3_master(net, oif)) {
3318			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3319					   FLOWI_FLAG_SKIP_NH_OIF;
3320		}
3321
3322		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 
 
 
 
 
 
 
 
 
 
3323	}
3324
3325	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3326	if (!skb) {
3327		ip6_rt_put(rt);
3328		err = -ENOBUFS;
3329		goto errout;
3330	}
3331
3332	/* Reserve room for dummy headers, this skb can pass
3333	   through good chunk of routing engine.
3334	 */
3335	skb_reset_mac_header(skb);
3336	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3337
3338	skb_dst_set(skb, &rt->dst);
3339
3340	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3341			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3342			    nlh->nlmsg_seq, 0, 0, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3343	if (err < 0) {
3344		kfree_skb(skb);
3345		goto errout;
3346	}
3347
3348	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3349errout:
3350	return err;
3351}
3352
3353void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3354		     unsigned int nlm_flags)
3355{
3356	struct sk_buff *skb;
3357	struct net *net = info->nl_net;
3358	u32 seq;
3359	int err;
3360
3361	err = -ENOBUFS;
3362	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3363
3364	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3365	if (!skb)
3366		goto errout;
3367
3368	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3369				event, info->portid, seq, 0, 0, nlm_flags);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3370	if (err < 0) {
3371		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3372		WARN_ON(err == -EMSGSIZE);
3373		kfree_skb(skb);
3374		goto errout;
3375	}
3376	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3377		    info->nlh, gfp_any());
3378	return;
3379errout:
3380	if (err < 0)
3381		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3382}
3383
3384static int ip6_route_dev_notify(struct notifier_block *this,
3385				unsigned long event, void *ptr)
3386{
3387	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3388	struct net *net = dev_net(dev);
3389
3390	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
 
 
 
 
3391		net->ipv6.ip6_null_entry->dst.dev = dev;
3392		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3393#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3395		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3396		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3397		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3398#endif
 
 
 
 
 
 
 
 
 
 
3399	}
3400
3401	return NOTIFY_OK;
3402}
3403
3404/*
3405 *	/proc
3406 */
3407
3408#ifdef CONFIG_PROC_FS
3409
3410static const struct file_operations ipv6_route_proc_fops = {
3411	.owner		= THIS_MODULE,
3412	.open		= ipv6_route_open,
3413	.read		= seq_read,
3414	.llseek		= seq_lseek,
3415	.release	= seq_release_net,
3416};
3417
3418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3419{
3420	struct net *net = (struct net *)seq->private;
3421	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3422		   net->ipv6.rt6_stats->fib_nodes,
3423		   net->ipv6.rt6_stats->fib_route_nodes,
3424		   net->ipv6.rt6_stats->fib_rt_alloc,
3425		   net->ipv6.rt6_stats->fib_rt_entries,
3426		   net->ipv6.rt6_stats->fib_rt_cache,
3427		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3428		   net->ipv6.rt6_stats->fib_discarded_routes);
3429
3430	return 0;
3431}
3432
3433static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3434{
3435	return single_open_net(inode, file, rt6_stats_seq_show);
3436}
3437
3438static const struct file_operations rt6_stats_seq_fops = {
3439	.owner	 = THIS_MODULE,
3440	.open	 = rt6_stats_seq_open,
3441	.read	 = seq_read,
3442	.llseek	 = seq_lseek,
3443	.release = single_release_net,
3444};
3445#endif	/* CONFIG_PROC_FS */
3446
3447#ifdef CONFIG_SYSCTL
3448
3449static
3450int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3451			      void __user *buffer, size_t *lenp, loff_t *ppos)
3452{
3453	struct net *net;
3454	int delay;
 
3455	if (!write)
3456		return -EINVAL;
3457
3458	net = (struct net *)ctl->extra1;
3459	delay = net->ipv6.sysctl.flush_delay;
3460	proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 
 
3461	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3462	return 0;
3463}
3464
3465struct ctl_table ipv6_route_table_template[] = {
3466	{
3467		.procname	=	"flush",
3468		.data		=	&init_net.ipv6.sysctl.flush_delay,
3469		.maxlen		=	sizeof(int),
3470		.mode		=	0200,
3471		.proc_handler	=	ipv6_sysctl_rtcache_flush
3472	},
3473	{
3474		.procname	=	"gc_thresh",
3475		.data		=	&ip6_dst_ops_template.gc_thresh,
3476		.maxlen		=	sizeof(int),
3477		.mode		=	0644,
3478		.proc_handler	=	proc_dointvec,
3479	},
3480	{
3481		.procname	=	"max_size",
3482		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
3483		.maxlen		=	sizeof(int),
3484		.mode		=	0644,
3485		.proc_handler	=	proc_dointvec,
3486	},
3487	{
3488		.procname	=	"gc_min_interval",
3489		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3490		.maxlen		=	sizeof(int),
3491		.mode		=	0644,
3492		.proc_handler	=	proc_dointvec_jiffies,
3493	},
3494	{
3495		.procname	=	"gc_timeout",
3496		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3497		.maxlen		=	sizeof(int),
3498		.mode		=	0644,
3499		.proc_handler	=	proc_dointvec_jiffies,
3500	},
3501	{
3502		.procname	=	"gc_interval",
3503		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
3504		.maxlen		=	sizeof(int),
3505		.mode		=	0644,
3506		.proc_handler	=	proc_dointvec_jiffies,
3507	},
3508	{
3509		.procname	=	"gc_elasticity",
3510		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3511		.maxlen		=	sizeof(int),
3512		.mode		=	0644,
3513		.proc_handler	=	proc_dointvec,
3514	},
3515	{
3516		.procname	=	"mtu_expires",
3517		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3518		.maxlen		=	sizeof(int),
3519		.mode		=	0644,
3520		.proc_handler	=	proc_dointvec_jiffies,
3521	},
3522	{
3523		.procname	=	"min_adv_mss",
3524		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
3525		.maxlen		=	sizeof(int),
3526		.mode		=	0644,
3527		.proc_handler	=	proc_dointvec,
3528	},
3529	{
3530		.procname	=	"gc_min_interval_ms",
3531		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532		.maxlen		=	sizeof(int),
3533		.mode		=	0644,
3534		.proc_handler	=	proc_dointvec_ms_jiffies,
3535	},
 
 
 
 
 
 
 
 
 
3536	{ }
3537};
3538
3539struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3540{
3541	struct ctl_table *table;
3542
3543	table = kmemdup(ipv6_route_table_template,
3544			sizeof(ipv6_route_table_template),
3545			GFP_KERNEL);
3546
3547	if (table) {
3548		table[0].data = &net->ipv6.sysctl.flush_delay;
3549		table[0].extra1 = net;
3550		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3551		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3552		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3553		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3554		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3555		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3556		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3557		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3558		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 
3559
3560		/* Don't export sysctls to unprivileged users */
3561		if (net->user_ns != &init_user_ns)
3562			table[0].procname = NULL;
3563	}
3564
3565	return table;
3566}
3567#endif
3568
3569static int __net_init ip6_route_net_init(struct net *net)
3570{
3571	int ret = -ENOMEM;
3572
3573	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3574	       sizeof(net->ipv6.ip6_dst_ops));
3575
3576	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3577		goto out_ip6_dst_ops;
3578
 
 
 
 
 
 
3579	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3580					   sizeof(*net->ipv6.ip6_null_entry),
3581					   GFP_KERNEL);
3582	if (!net->ipv6.ip6_null_entry)
3583		goto out_ip6_dst_entries;
3584	net->ipv6.ip6_null_entry->dst.path =
3585		(struct dst_entry *)net->ipv6.ip6_null_entry;
3586	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3587	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3588			 ip6_template_metrics, true);
 
3589
3590#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
3591	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3592					       sizeof(*net->ipv6.ip6_prohibit_entry),
3593					       GFP_KERNEL);
3594	if (!net->ipv6.ip6_prohibit_entry)
3595		goto out_ip6_null_entry;
3596	net->ipv6.ip6_prohibit_entry->dst.path =
3597		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3598	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3599	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3600			 ip6_template_metrics, true);
 
3601
3602	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3603					       sizeof(*net->ipv6.ip6_blk_hole_entry),
3604					       GFP_KERNEL);
3605	if (!net->ipv6.ip6_blk_hole_entry)
3606		goto out_ip6_prohibit_entry;
3607	net->ipv6.ip6_blk_hole_entry->dst.path =
3608		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3609	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3610	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3611			 ip6_template_metrics, true);
 
 
 
 
3612#endif
3613
3614	net->ipv6.sysctl.flush_delay = 0;
3615	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3616	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3617	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3618	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3619	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3620	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3621	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
3622
3623	net->ipv6.ip6_rt_gc_expire = 30*HZ;
3624
3625	ret = 0;
3626out:
3627	return ret;
3628
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630out_ip6_prohibit_entry:
3631	kfree(net->ipv6.ip6_prohibit_entry);
3632out_ip6_null_entry:
3633	kfree(net->ipv6.ip6_null_entry);
3634#endif
 
 
3635out_ip6_dst_entries:
3636	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3637out_ip6_dst_ops:
3638	goto out;
3639}
3640
3641static void __net_exit ip6_route_net_exit(struct net *net)
3642{
 
3643	kfree(net->ipv6.ip6_null_entry);
3644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3645	kfree(net->ipv6.ip6_prohibit_entry);
3646	kfree(net->ipv6.ip6_blk_hole_entry);
3647#endif
3648	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3649}
3650
3651static int __net_init ip6_route_net_init_late(struct net *net)
3652{
3653#ifdef CONFIG_PROC_FS
3654	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3655	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
 
 
3656#endif
3657	return 0;
3658}
3659
3660static void __net_exit ip6_route_net_exit_late(struct net *net)
3661{
3662#ifdef CONFIG_PROC_FS
3663	remove_proc_entry("ipv6_route", net->proc_net);
3664	remove_proc_entry("rt6_stats", net->proc_net);
3665#endif
3666}
3667
3668static struct pernet_operations ip6_route_net_ops = {
3669	.init = ip6_route_net_init,
3670	.exit = ip6_route_net_exit,
3671};
3672
3673static int __net_init ipv6_inetpeer_init(struct net *net)
3674{
3675	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3676
3677	if (!bp)
3678		return -ENOMEM;
3679	inet_peer_base_init(bp);
3680	net->ipv6.peers = bp;
3681	return 0;
3682}
3683
3684static void __net_exit ipv6_inetpeer_exit(struct net *net)
3685{
3686	struct inet_peer_base *bp = net->ipv6.peers;
3687
3688	net->ipv6.peers = NULL;
3689	inetpeer_invalidate_tree(bp);
3690	kfree(bp);
3691}
3692
3693static struct pernet_operations ipv6_inetpeer_ops = {
3694	.init	=	ipv6_inetpeer_init,
3695	.exit	=	ipv6_inetpeer_exit,
3696};
3697
3698static struct pernet_operations ip6_route_net_late_ops = {
3699	.init = ip6_route_net_init_late,
3700	.exit = ip6_route_net_exit_late,
3701};
3702
3703static struct notifier_block ip6_route_dev_notifier = {
3704	.notifier_call = ip6_route_dev_notify,
3705	.priority = 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3706};
3707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3708int __init ip6_route_init(void)
3709{
3710	int ret;
3711	int cpu;
3712
3713	ret = -ENOMEM;
3714	ip6_dst_ops_template.kmem_cachep =
3715		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3716				  SLAB_HWCACHE_ALIGN, NULL);
3717	if (!ip6_dst_ops_template.kmem_cachep)
3718		goto out;
3719
3720	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3721	if (ret)
3722		goto out_kmem_cache;
3723
3724	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3725	if (ret)
3726		goto out_dst_entries;
3727
3728	ret = register_pernet_subsys(&ip6_route_net_ops);
3729	if (ret)
3730		goto out_register_inetpeer;
3731
3732	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3733
3734	/* Registering of the loopback is done before this portion of code,
3735	 * the loopback reference in rt6_info will not be taken, do it
3736	 * manually for init_net */
3737	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3738	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3739  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3740	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3741	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3742	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3743	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3744  #endif
3745	ret = fib6_init();
3746	if (ret)
3747		goto out_register_subsys;
3748
3749	ret = xfrm6_init();
3750	if (ret)
3751		goto out_fib6_init;
3752
3753	ret = fib6_rules_init();
3754	if (ret)
3755		goto xfrm6_init;
3756
3757	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3758	if (ret)
3759		goto fib6_rules_init;
3760
3761	ret = -ENOBUFS;
3762	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3763	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3764	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
 
 
 
 
 
 
 
 
 
 
3765		goto out_register_late_subsys;
3766
3767	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3768	if (ret)
3769		goto out_register_late_subsys;
3770
 
 
 
 
 
 
 
 
3771	for_each_possible_cpu(cpu) {
3772		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3773
3774		INIT_LIST_HEAD(&ul->head);
3775		spin_lock_init(&ul->lock);
3776	}
3777
3778out:
3779	return ret;
3780
3781out_register_late_subsys:
 
3782	unregister_pernet_subsys(&ip6_route_net_late_ops);
3783fib6_rules_init:
3784	fib6_rules_cleanup();
3785xfrm6_init:
3786	xfrm6_fini();
3787out_fib6_init:
3788	fib6_gc_cleanup();
3789out_register_subsys:
3790	unregister_pernet_subsys(&ip6_route_net_ops);
3791out_register_inetpeer:
3792	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793out_dst_entries:
3794	dst_entries_destroy(&ip6_dst_blackhole_ops);
3795out_kmem_cache:
3796	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797	goto out;
3798}
3799
3800void ip6_route_cleanup(void)
3801{
 
 
 
 
 
3802	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3803	unregister_pernet_subsys(&ip6_route_net_late_ops);
3804	fib6_rules_cleanup();
3805	xfrm6_fini();
3806	fib6_gc_cleanup();
3807	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3808	unregister_pernet_subsys(&ip6_route_net_ops);
3809	dst_entries_destroy(&ip6_dst_blackhole_ops);
3810	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3811}
v5.9
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux INET6 implementation
   4 *	FIB front-end.
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
 
 
 
 
 
   8 */
   9
  10/*	Changes:
  11 *
  12 *	YOSHIFUJI Hideaki @USAGI
  13 *		reworked default router selection.
  14 *		- respect outgoing interface
  15 *		- select from (probably) reachable routers (i.e.
  16 *		routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *		- always select the same router if it is (probably)
  18 *		reachable.  otherwise, round-robin the list.
  19 *	Ville Nuorvala
  20 *		Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/dst_metadata.h>
  55#include <net/xfrm.h>
  56#include <net/netevent.h>
  57#include <net/netlink.h>
  58#include <net/rtnh.h>
  59#include <net/lwtunnel.h>
  60#include <net/ip_tunnels.h>
  61#include <net/l3mdev.h>
  62#include <net/ip.h>
  63#include <linux/uaccess.h>
  64#include <linux/btf_ids.h>
  65
  66#ifdef CONFIG_SYSCTL
  67#include <linux/sysctl.h>
  68#endif
  69
  70static int ip6_rt_type_to_error(u8 fib6_type);
  71
  72#define CREATE_TRACE_POINTS
  73#include <trace/events/fib6.h>
  74EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  75#undef CREATE_TRACE_POINTS
  76
  77enum rt6_nud_state {
  78	RT6_NUD_FAIL_HARD = -3,
  79	RT6_NUD_FAIL_PROBE = -2,
  80	RT6_NUD_FAIL_DO_RR = -1,
  81	RT6_NUD_SUCCEED = 1
  82};
  83
 
  84static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
  85static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
  86static unsigned int	 ip6_mtu(const struct dst_entry *dst);
  87static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  88static void		ip6_dst_destroy(struct dst_entry *);
  89static void		ip6_dst_ifdown(struct dst_entry *,
  90				       struct net_device *dev, int how);
  91static int		 ip6_dst_gc(struct dst_ops *ops);
  92
  93static int		ip6_pkt_discard(struct sk_buff *skb);
  94static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  95static int		ip6_pkt_prohibit(struct sk_buff *skb);
  96static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  97static void		ip6_link_failure(struct sk_buff *skb);
  98static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  99					   struct sk_buff *skb, u32 mtu,
 100					   bool confirm_neigh);
 101static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 102					struct sk_buff *skb);
 103static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 104			   int strict);
 105static size_t rt6_nlmsg_size(struct fib6_info *f6i);
 106static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 107			 struct fib6_info *rt, struct dst_entry *dst,
 108			 struct in6_addr *dest, struct in6_addr *src,
 109			 int iif, int type, u32 portid, u32 seq,
 110			 unsigned int flags);
 111static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 112					   const struct in6_addr *daddr,
 113					   const struct in6_addr *saddr);
 114
 115#ifdef CONFIG_IPV6_ROUTE_INFO
 116static struct fib6_info *rt6_add_route_info(struct net *net,
 117					   const struct in6_addr *prefix, int prefixlen,
 118					   const struct in6_addr *gwaddr,
 119					   struct net_device *dev,
 120					   unsigned int pref);
 121static struct fib6_info *rt6_get_route_info(struct net *net,
 122					   const struct in6_addr *prefix, int prefixlen,
 123					   const struct in6_addr *gwaddr,
 124					   struct net_device *dev);
 125#endif
 126
 127struct uncached_list {
 128	spinlock_t		lock;
 129	struct list_head	head;
 130};
 131
 132static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 133
 134void rt6_uncached_list_add(struct rt6_info *rt)
 135{
 136	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 137
 
 138	rt->rt6i_uncached_list = ul;
 139
 140	spin_lock_bh(&ul->lock);
 141	list_add_tail(&rt->rt6i_uncached, &ul->head);
 142	spin_unlock_bh(&ul->lock);
 143}
 144
 145void rt6_uncached_list_del(struct rt6_info *rt)
 146{
 147	if (!list_empty(&rt->rt6i_uncached)) {
 148		struct uncached_list *ul = rt->rt6i_uncached_list;
 149		struct net *net = dev_net(rt->dst.dev);
 150
 151		spin_lock_bh(&ul->lock);
 152		list_del(&rt->rt6i_uncached);
 153		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
 154		spin_unlock_bh(&ul->lock);
 155	}
 156}
 157
 158static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 159{
 160	struct net_device *loopback_dev = net->loopback_dev;
 161	int cpu;
 162
 163	if (dev == loopback_dev)
 164		return;
 165
 166	for_each_possible_cpu(cpu) {
 167		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 168		struct rt6_info *rt;
 169
 170		spin_lock_bh(&ul->lock);
 171		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 172			struct inet6_dev *rt_idev = rt->rt6i_idev;
 173			struct net_device *rt_dev = rt->dst.dev;
 174
 175			if (rt_idev->dev == dev) {
 176				rt->rt6i_idev = in6_dev_get(loopback_dev);
 177				in6_dev_put(rt_idev);
 178			}
 179
 180			if (rt_dev == dev) {
 181				rt->dst.dev = blackhole_netdev;
 182				dev_hold(rt->dst.dev);
 183				dev_put(rt_dev);
 184			}
 185		}
 186		spin_unlock_bh(&ul->lock);
 187	}
 188}
 189
 190static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 191					     struct sk_buff *skb,
 192					     const void *daddr)
 193{
 
 
 194	if (!ipv6_addr_any(p))
 195		return (const void *) p;
 196	else if (skb)
 197		return &ipv6_hdr(skb)->daddr;
 198	return daddr;
 199}
 200
 201struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 202				   struct net_device *dev,
 203				   struct sk_buff *skb,
 204				   const void *daddr)
 205{
 
 206	struct neighbour *n;
 207
 208	daddr = choose_neigh_daddr(gw, skb, daddr);
 209	n = __ipv6_neigh_lookup(dev, daddr);
 210	if (n)
 211		return n;
 212
 213	n = neigh_create(&nd_tbl, daddr, dev);
 214	return IS_ERR(n) ? NULL : n;
 215}
 216
 217static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 218					      struct sk_buff *skb,
 219					      const void *daddr)
 220{
 221	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 222
 223	return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 224				dst->dev, skb, daddr);
 225}
 226
 227static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 228{
 229	struct net_device *dev = dst->dev;
 230	struct rt6_info *rt = (struct rt6_info *)dst;
 231
 232	daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
 233	if (!daddr)
 234		return;
 235	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 236		return;
 237	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 238		return;
 239	__ipv6_confirm_neigh(dev, daddr);
 240}
 241
 242static struct dst_ops ip6_dst_ops_template = {
 243	.family			=	AF_INET6,
 244	.gc			=	ip6_dst_gc,
 245	.gc_thresh		=	1024,
 246	.check			=	ip6_dst_check,
 247	.default_advmss		=	ip6_default_advmss,
 248	.mtu			=	ip6_mtu,
 249	.cow_metrics		=	dst_cow_metrics_generic,
 250	.destroy		=	ip6_dst_destroy,
 251	.ifdown			=	ip6_dst_ifdown,
 252	.negative_advice	=	ip6_negative_advice,
 253	.link_failure		=	ip6_link_failure,
 254	.update_pmtu		=	ip6_rt_update_pmtu,
 255	.redirect		=	rt6_do_redirect,
 256	.local_out		=	__ip6_local_out,
 257	.neigh_lookup		=	ip6_dst_neigh_lookup,
 258	.confirm_neigh		=	ip6_confirm_neigh,
 259};
 260
 261static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 262{
 263	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 264
 265	return mtu ? : dst->dev->mtu;
 266}
 267
 268static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 269					 struct sk_buff *skb, u32 mtu,
 270					 bool confirm_neigh)
 271{
 272}
 273
 274static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 275				      struct sk_buff *skb)
 276{
 277}
 278
 279static struct dst_ops ip6_dst_blackhole_ops = {
 280	.family			=	AF_INET6,
 281	.destroy		=	ip6_dst_destroy,
 282	.check			=	ip6_dst_check,
 283	.mtu			=	ip6_blackhole_mtu,
 284	.default_advmss		=	ip6_default_advmss,
 285	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 286	.redirect		=	ip6_rt_blackhole_redirect,
 287	.cow_metrics		=	dst_cow_metrics_generic,
 288	.neigh_lookup		=	ip6_dst_neigh_lookup,
 289};
 290
 291static const u32 ip6_template_metrics[RTAX_MAX] = {
 292	[RTAX_HOPLIMIT - 1] = 0,
 293};
 294
 295static const struct fib6_info fib6_null_entry_template = {
 296	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 297	.fib6_protocol  = RTPROT_KERNEL,
 298	.fib6_metric	= ~(u32)0,
 299	.fib6_ref	= REFCOUNT_INIT(1),
 300	.fib6_type	= RTN_UNREACHABLE,
 301	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
 302};
 303
 304static const struct rt6_info ip6_null_entry_template = {
 305	.dst = {
 306		.__refcnt	= ATOMIC_INIT(1),
 307		.__use		= 1,
 308		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 309		.error		= -ENETUNREACH,
 310		.input		= ip6_pkt_discard,
 311		.output		= ip6_pkt_discard_out,
 312	},
 313	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 314};
 315
 316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 317
 318static const struct rt6_info ip6_prohibit_entry_template = {
 319	.dst = {
 320		.__refcnt	= ATOMIC_INIT(1),
 321		.__use		= 1,
 322		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 323		.error		= -EACCES,
 324		.input		= ip6_pkt_prohibit,
 325		.output		= ip6_pkt_prohibit_out,
 326	},
 327	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 328};
 329
 330static const struct rt6_info ip6_blk_hole_entry_template = {
 331	.dst = {
 332		.__refcnt	= ATOMIC_INIT(1),
 333		.__use		= 1,
 334		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 335		.error		= -EINVAL,
 336		.input		= dst_discard,
 337		.output		= dst_discard_out,
 338	},
 339	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 
 
 
 340};
 341
 342#endif
 343
 344static void rt6_info_init(struct rt6_info *rt)
 345{
 346	struct dst_entry *dst = &rt->dst;
 347
 348	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 
 349	INIT_LIST_HEAD(&rt->rt6i_uncached);
 350}
 351
 352/* allocate dst with ip6_dst_ops */
 353struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 354			       int flags)
 355{
 356	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 357					1, DST_OBSOLETE_FORCE_CHK, flags);
 358
 359	if (rt) {
 360		rt6_info_init(rt);
 361		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 
 
 
 
 
 
 
 
 
 
 
 
 
 362	}
 363
 364	return rt;
 365}
 366EXPORT_SYMBOL(ip6_dst_alloc);
 367
 368static void ip6_dst_destroy(struct dst_entry *dst)
 369{
 370	struct rt6_info *rt = (struct rt6_info *)dst;
 371	struct fib6_info *from;
 372	struct inet6_dev *idev;
 373
 374	ip_dst_metrics_put(dst);
 
 375	rt6_uncached_list_del(rt);
 376
 377	idev = rt->rt6i_idev;
 378	if (idev) {
 379		rt->rt6i_idev = NULL;
 380		in6_dev_put(idev);
 381	}
 382
 383	from = xchg((__force struct fib6_info **)&rt->from, NULL);
 384	fib6_info_release(from);
 385}
 386
 387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 388			   int how)
 389{
 390	struct rt6_info *rt = (struct rt6_info *)dst;
 391	struct inet6_dev *idev = rt->rt6i_idev;
 392	struct net_device *loopback_dev =
 393		dev_net(dev)->loopback_dev;
 394
 395	if (idev && idev->dev != loopback_dev) {
 396		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
 397		if (loopback_idev) {
 398			rt->rt6i_idev = loopback_idev;
 399			in6_dev_put(idev);
 
 
 
 400		}
 401	}
 402}
 403
 404static bool __rt6_check_expired(const struct rt6_info *rt)
 405{
 406	if (rt->rt6i_flags & RTF_EXPIRES)
 407		return time_after(jiffies, rt->dst.expires);
 408	else
 409		return false;
 410}
 411
 412static bool rt6_check_expired(const struct rt6_info *rt)
 413{
 414	struct fib6_info *from;
 415
 416	from = rcu_dereference(rt->from);
 417
 418	if (rt->rt6i_flags & RTF_EXPIRES) {
 419		if (time_after(jiffies, rt->dst.expires))
 420			return true;
 421	} else if (from) {
 422		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 423			fib6_check_expired(from);
 424	}
 425	return false;
 426}
 427
 428void fib6_select_path(const struct net *net, struct fib6_result *res,
 429		      struct flowi6 *fl6, int oif, bool have_oif_match,
 430		      const struct sk_buff *skb, int strict)
 
 
 
 431{
 432	struct fib6_info *sibling, *next_sibling;
 433	struct fib6_info *match = res->f6i;
 434
 435	if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
 436		goto out;
 
 
 
 
 437
 438	if (match->nh && have_oif_match && res->nh)
 439		return;
 440
 441	/* We might have already computed the hash for ICMPv6 errors. In such
 442	 * case it will always be non-zero. Otherwise now is the time to do it.
 443	 */
 444	if (!fl6->mp_hash &&
 445	    (!match->nh || nexthop_is_multipath(match->nh)))
 446		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 447
 448	if (unlikely(match->nh)) {
 449		nexthop_path_fib6_result(res, fl6->mp_hash);
 450		return;
 451	}
 452
 453	if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
 454		goto out;
 455
 456	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 457				 fib6_siblings) {
 458		const struct fib6_nh *nh = sibling->fib6_nh;
 459		int nh_upper_bound;
 460
 461		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 462		if (fl6->mp_hash > nh_upper_bound)
 463			continue;
 464		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 465			break;
 466		match = sibling;
 467		break;
 468	}
 469
 470out:
 471	res->f6i = match;
 472	res->nh = match->fib6_nh;
 473}
 474
 475/*
 476 *	Route lookup. rcu_read_lock() should be held.
 477 */
 478
 479static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 480			       const struct in6_addr *saddr, int oif, int flags)
 481{
 482	const struct net_device *dev;
 483
 484	if (nh->fib_nh_flags & RTNH_F_DEAD)
 485		return false;
 486
 487	dev = nh->fib_nh_dev;
 488	if (oif) {
 489		if (dev->ifindex == oif)
 490			return true;
 491	} else {
 492		if (ipv6_chk_addr(net, saddr, dev,
 493				  flags & RT6_LOOKUP_F_IFACE))
 494			return true;
 495	}
 496
 497	return false;
 498}
 499
 500struct fib6_nh_dm_arg {
 501	struct net		*net;
 502	const struct in6_addr	*saddr;
 503	int			oif;
 504	int			flags;
 505	struct fib6_nh		*nh;
 506};
 507
 508static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
 509{
 510	struct fib6_nh_dm_arg *arg = _arg;
 511
 512	arg->nh = nh;
 513	return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
 514				  arg->flags);
 515}
 516
 517/* returns fib6_nh from nexthop or NULL */
 518static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
 519					struct fib6_result *res,
 520					const struct in6_addr *saddr,
 521					int oif, int flags)
 522{
 523	struct fib6_nh_dm_arg arg = {
 524		.net   = net,
 525		.saddr = saddr,
 526		.oif   = oif,
 527		.flags = flags,
 528	};
 529
 530	if (nexthop_is_blackhole(nh))
 531		return NULL;
 532
 533	if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
 534		return arg.nh;
 535
 536	return NULL;
 537}
 538
 539static void rt6_device_match(struct net *net, struct fib6_result *res,
 540			     const struct in6_addr *saddr, int oif, int flags)
 541{
 542	struct fib6_info *f6i = res->f6i;
 543	struct fib6_info *spf6i;
 544	struct fib6_nh *nh;
 545
 546	if (!oif && ipv6_addr_any(saddr)) {
 547		if (unlikely(f6i->nh)) {
 548			nh = nexthop_fib6_nh(f6i->nh);
 549			if (nexthop_is_blackhole(f6i->nh))
 550				goto out_blackhole;
 551		} else {
 552			nh = f6i->fib6_nh;
 
 
 553		}
 554		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 555			goto out;
 556	}
 557
 558	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 559		bool matched = false;
 560
 561		if (unlikely(spf6i->nh)) {
 562			nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
 563					      oif, flags);
 564			if (nh)
 565				matched = true;
 566		} else {
 567			nh = spf6i->fib6_nh;
 568			if (__rt6_device_match(net, nh, saddr, oif, flags))
 569				matched = true;
 570		}
 571		if (matched) {
 572			res->f6i = spf6i;
 573			goto out;
 574		}
 575	}
 576
 577	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 578		res->f6i = net->ipv6.fib6_null_entry;
 579		nh = res->f6i->fib6_nh;
 580		goto out;
 581	}
 582
 583	if (unlikely(f6i->nh)) {
 584		nh = nexthop_fib6_nh(f6i->nh);
 585		if (nexthop_is_blackhole(f6i->nh))
 586			goto out_blackhole;
 587	} else {
 588		nh = f6i->fib6_nh;
 589	}
 590
 591	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 592		res->f6i = net->ipv6.fib6_null_entry;
 593		nh = res->f6i->fib6_nh;
 594	}
 595out:
 596	res->nh = nh;
 597	res->fib6_type = res->f6i->fib6_type;
 598	res->fib6_flags = res->f6i->fib6_flags;
 599	return;
 600
 601out_blackhole:
 602	res->fib6_flags |= RTF_REJECT;
 603	res->fib6_type = RTN_BLACKHOLE;
 604	res->nh = nh;
 605}
 606
 607#ifdef CONFIG_IPV6_ROUTER_PREF
 608struct __rt6_probe_work {
 609	struct work_struct work;
 610	struct in6_addr target;
 611	struct net_device *dev;
 612};
 613
 614static void rt6_probe_deferred(struct work_struct *w)
 615{
 616	struct in6_addr mcaddr;
 617	struct __rt6_probe_work *work =
 618		container_of(w, struct __rt6_probe_work, work);
 619
 620	addrconf_addr_solict_mult(&work->target, &mcaddr);
 621	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 622	dev_put(work->dev);
 623	kfree(work);
 624}
 625
 626static void rt6_probe(struct fib6_nh *fib6_nh)
 627{
 628	struct __rt6_probe_work *work = NULL;
 629	const struct in6_addr *nh_gw;
 630	unsigned long last_probe;
 631	struct neighbour *neigh;
 632	struct net_device *dev;
 633	struct inet6_dev *idev;
 634
 635	/*
 636	 * Okay, this does not seem to be appropriate
 637	 * for now, however, we need to check if it
 638	 * is really so; aka Router Reachability Probing.
 639	 *
 640	 * Router Reachability Probe MUST be rate-limited
 641	 * to no more than one per minute.
 642	 */
 643	if (!fib6_nh->fib_nh_gw_family)
 644		return;
 645
 646	nh_gw = &fib6_nh->fib_nh_gw6;
 647	dev = fib6_nh->fib_nh_dev;
 648	rcu_read_lock_bh();
 649	last_probe = READ_ONCE(fib6_nh->last_probe);
 650	idev = __in6_dev_get(dev);
 651	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 652	if (neigh) {
 653		if (neigh->nud_state & NUD_VALID)
 654			goto out;
 655
 
 656		write_lock(&neigh->lock);
 657		if (!(neigh->nud_state & NUD_VALID) &&
 658		    time_after(jiffies,
 659			       neigh->updated + idev->cnf.rtr_probe_interval)) {
 
 660			work = kmalloc(sizeof(*work), GFP_ATOMIC);
 661			if (work)
 662				__neigh_set_probe_once(neigh);
 663		}
 664		write_unlock(&neigh->lock);
 665	} else if (time_after(jiffies, last_probe +
 666				       idev->cnf.rtr_probe_interval)) {
 667		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 668	}
 669
 670	if (!work || cmpxchg(&fib6_nh->last_probe,
 671			     last_probe, jiffies) != last_probe) {
 672		kfree(work);
 673	} else {
 674		INIT_WORK(&work->work, rt6_probe_deferred);
 675		work->target = *nh_gw;
 676		dev_hold(dev);
 677		work->dev = dev;
 678		schedule_work(&work->work);
 679	}
 680
 681out:
 682	rcu_read_unlock_bh();
 683}
 684#else
 685static inline void rt6_probe(struct fib6_nh *fib6_nh)
 686{
 687}
 688#endif
 689
 690/*
 691 * Default Router Selection (RFC 2461 6.3.6)
 692 */
 693static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 
 
 
 
 
 
 
 
 
 
 
 694{
 
 695	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 696	struct neighbour *neigh;
 
 
 
 697
 698	rcu_read_lock_bh();
 699	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 700					  &fib6_nh->fib_nh_gw6);
 701	if (neigh) {
 702		read_lock(&neigh->lock);
 703		if (neigh->nud_state & NUD_VALID)
 704			ret = RT6_NUD_SUCCEED;
 705#ifdef CONFIG_IPV6_ROUTER_PREF
 706		else if (!(neigh->nud_state & NUD_FAILED))
 707			ret = RT6_NUD_SUCCEED;
 708		else
 709			ret = RT6_NUD_FAIL_PROBE;
 710#endif
 711		read_unlock(&neigh->lock);
 712	} else {
 713		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 714		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 715	}
 716	rcu_read_unlock_bh();
 717
 718	return ret;
 719}
 720
 721static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 722			   int strict)
 723{
 724	int m = 0;
 725
 726	if (!oif || nh->fib_nh_dev->ifindex == oif)
 727		m = 2;
 728
 
 729	if (!m && (strict & RT6_LOOKUP_F_IFACE))
 730		return RT6_NUD_FAIL_HARD;
 731#ifdef CONFIG_IPV6_ROUTER_PREF
 732	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 733#endif
 734	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 735	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 736		int n = rt6_check_neigh(nh);
 737		if (n < 0)
 738			return n;
 739	}
 740	return m;
 741}
 742
 743static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 744		       int oif, int strict, int *mpri, bool *do_rr)
 
 745{
 
 746	bool match_do_rr = false;
 747	bool rc = false;
 748	int m;
 749
 750	if (nh->fib_nh_flags & RTNH_F_DEAD)
 
 751		goto out;
 752
 753	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 754	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 755	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 756		goto out;
 757
 758	m = rt6_score_route(nh, fib6_flags, oif, strict);
 759	if (m == RT6_NUD_FAIL_DO_RR) {
 760		match_do_rr = true;
 761		m = 0; /* lowest valid score */
 762	} else if (m == RT6_NUD_FAIL_HARD) {
 763		goto out;
 764	}
 765
 766	if (strict & RT6_LOOKUP_F_REACHABLE)
 767		rt6_probe(nh);
 768
 769	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
 770	if (m > *mpri) {
 771		*do_rr = match_do_rr;
 772		*mpri = m;
 773		rc = true;
 774	}
 775out:
 776	return rc;
 777}
 778
 779struct fib6_nh_frl_arg {
 780	u32		flags;
 781	int		oif;
 782	int		strict;
 783	int		*mpri;
 784	bool		*do_rr;
 785	struct fib6_nh	*nh;
 786};
 787
 788static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
 789{
 790	struct fib6_nh_frl_arg *arg = _arg;
 
 791
 792	arg->nh = nh;
 793	return find_match(nh, arg->flags, arg->oif, arg->strict,
 794			  arg->mpri, arg->do_rr);
 795}
 
 
 
 796
 797static void __find_rr_leaf(struct fib6_info *f6i_start,
 798			   struct fib6_info *nomatch, u32 metric,
 799			   struct fib6_result *res, struct fib6_info **cont,
 800			   int oif, int strict, bool *do_rr, int *mpri)
 801{
 802	struct fib6_info *f6i;
 803
 804	for (f6i = f6i_start;
 805	     f6i && f6i != nomatch;
 806	     f6i = rcu_dereference(f6i->fib6_next)) {
 807		bool matched = false;
 808		struct fib6_nh *nh;
 809
 810		if (cont && f6i->fib6_metric != metric) {
 811			*cont = f6i;
 812			return;
 813		}
 814
 815		if (fib6_check_expired(f6i))
 816			continue;
 817
 818		if (unlikely(f6i->nh)) {
 819			struct fib6_nh_frl_arg arg = {
 820				.flags  = f6i->fib6_flags,
 821				.oif    = oif,
 822				.strict = strict,
 823				.mpri   = mpri,
 824				.do_rr  = do_rr
 825			};
 826
 827			if (nexthop_is_blackhole(f6i->nh)) {
 828				res->fib6_flags = RTF_REJECT;
 829				res->fib6_type = RTN_BLACKHOLE;
 830				res->f6i = f6i;
 831				res->nh = nexthop_fib6_nh(f6i->nh);
 832				return;
 833			}
 834			if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
 835						     &arg)) {
 836				matched = true;
 837				nh = arg.nh;
 838			}
 839		} else {
 840			nh = f6i->fib6_nh;
 841			if (find_match(nh, f6i->fib6_flags, oif, strict,
 842				       mpri, do_rr))
 843				matched = true;
 844		}
 845		if (matched) {
 846			res->f6i = f6i;
 847			res->nh = nh;
 848			res->fib6_flags = f6i->fib6_flags;
 849			res->fib6_type = f6i->fib6_type;
 850		}
 851	}
 852}
 853
 854static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 855			 struct fib6_info *rr_head, int oif, int strict,
 856			 bool *do_rr, struct fib6_result *res)
 857{
 858	u32 metric = rr_head->fib6_metric;
 859	struct fib6_info *cont = NULL;
 860	int mpri = -1;
 861
 862	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 863		       oif, strict, do_rr, &mpri);
 864
 865	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 866		       oif, strict, do_rr, &mpri);
 867
 868	if (res->f6i || !cont)
 869		return;
 870
 871	__find_rr_leaf(cont, NULL, metric, res, NULL,
 872		       oif, strict, do_rr, &mpri);
 873}
 874
 875static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 876		       struct fib6_result *res, int strict)
 877{
 878	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 879	struct fib6_info *rt0;
 880	bool do_rr = false;
 881	int key_plen;
 882
 883	/* make sure this function or its helpers sets f6i */
 884	res->f6i = NULL;
 885
 886	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 887		goto out;
 888
 889	rt0 = rcu_dereference(fn->rr_ptr);
 890	if (!rt0)
 891		rt0 = leaf;
 892
 893	/* Double check to make sure fn is not an intermediate node
 894	 * and fn->leaf does not points to its child's leaf
 895	 * (This might happen if all routes under fn are deleted from
 896	 * the tree and fib6_repair_tree() is called on the node.)
 897	 */
 898	key_plen = rt0->fib6_dst.plen;
 899#ifdef CONFIG_IPV6_SUBTREES
 900	if (rt0->fib6_src.plen)
 901		key_plen = rt0->fib6_src.plen;
 902#endif
 903	if (fn->fn_bit != key_plen)
 904		goto out;
 905
 906	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 907	if (do_rr) {
 908		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 909
 910		/* no entries matched; do round-robin */
 911		if (!next || next->fib6_metric != rt0->fib6_metric)
 912			next = leaf;
 913
 914		if (next != rt0) {
 915			spin_lock_bh(&leaf->fib6_table->tb6_lock);
 916			/* make sure next is not being deleted from the tree */
 917			if (next->fib6_node)
 918				rcu_assign_pointer(fn->rr_ptr, next);
 919			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 920		}
 921	}
 922
 923out:
 924	if (!res->f6i) {
 925		res->f6i = net->ipv6.fib6_null_entry;
 926		res->nh = res->f6i->fib6_nh;
 927		res->fib6_flags = res->f6i->fib6_flags;
 928		res->fib6_type = res->f6i->fib6_type;
 929	}
 930}
 931
 932static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 933{
 934	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 935	       res->nh->fib_nh_gw_family;
 936}
 937
 938#ifdef CONFIG_IPV6_ROUTE_INFO
 939int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 940		  const struct in6_addr *gwaddr)
 941{
 942	struct net *net = dev_net(dev);
 943	struct route_info *rinfo = (struct route_info *) opt;
 944	struct in6_addr prefix_buf, *prefix;
 945	unsigned int pref;
 946	unsigned long lifetime;
 947	struct fib6_info *rt;
 948
 949	if (len < sizeof(struct route_info)) {
 950		return -EINVAL;
 951	}
 952
 953	/* Sanity check for prefix_len and length */
 954	if (rinfo->length > 3) {
 955		return -EINVAL;
 956	} else if (rinfo->prefix_len > 128) {
 957		return -EINVAL;
 958	} else if (rinfo->prefix_len > 64) {
 959		if (rinfo->length < 2) {
 960			return -EINVAL;
 961		}
 962	} else if (rinfo->prefix_len > 0) {
 963		if (rinfo->length < 1) {
 964			return -EINVAL;
 965		}
 966	}
 967
 968	pref = rinfo->route_pref;
 969	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 970		return -EINVAL;
 971
 972	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 973
 974	if (rinfo->length == 3)
 975		prefix = (struct in6_addr *)rinfo->prefix;
 976	else {
 977		/* this function is safe */
 978		ipv6_addr_prefix(&prefix_buf,
 979				 (struct in6_addr *)rinfo->prefix,
 980				 rinfo->prefix_len);
 981		prefix = &prefix_buf;
 982	}
 983
 984	if (rinfo->prefix_len == 0)
 985		rt = rt6_get_dflt_router(net, gwaddr, dev);
 986	else
 987		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 988					gwaddr, dev);
 989
 990	if (rt && !lifetime) {
 991		ip6_del_rt(net, rt, false);
 992		rt = NULL;
 993	}
 994
 995	if (!rt && lifetime)
 996		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 997					dev, pref);
 998	else if (rt)
 999		rt->fib6_flags = RTF_ROUTEINFO |
1000				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1001
1002	if (rt) {
1003		if (!addrconf_finite_timeout(lifetime))
1004			fib6_clean_expires(rt);
1005		else
1006			fib6_set_expires(rt, jiffies + HZ * lifetime);
1007
1008		fib6_info_release(rt);
1009	}
1010	return 0;
1011}
1012#endif
1013
1014/*
1015 *	Misc support functions
1016 */
1017
1018/* called with rcu_lock held */
1019static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1020{
1021	struct net_device *dev = res->nh->fib_nh_dev;
1022
1023	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1024		/* for copies of local routes, dst->dev needs to be the
1025		 * device if it is a master device, the master device if
1026		 * device is enslaved, and the loopback as the default
1027		 */
1028		if (netif_is_l3_slave(dev) &&
1029		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
1030			dev = l3mdev_master_dev_rcu(dev);
1031		else if (!netif_is_l3_master(dev))
1032			dev = dev_net(dev)->loopback_dev;
1033		/* last case is netif_is_l3_master(dev) is true in which
1034		 * case we want dev returned to be dev
1035		 */
1036	}
1037
1038	return dev;
1039}
1040
1041static const int fib6_prop[RTN_MAX + 1] = {
1042	[RTN_UNSPEC]	= 0,
1043	[RTN_UNICAST]	= 0,
1044	[RTN_LOCAL]	= 0,
1045	[RTN_BROADCAST]	= 0,
1046	[RTN_ANYCAST]	= 0,
1047	[RTN_MULTICAST]	= 0,
1048	[RTN_BLACKHOLE]	= -EINVAL,
1049	[RTN_UNREACHABLE] = -EHOSTUNREACH,
1050	[RTN_PROHIBIT]	= -EACCES,
1051	[RTN_THROW]	= -EAGAIN,
1052	[RTN_NAT]	= -EINVAL,
1053	[RTN_XRESOLVE]	= -EINVAL,
1054};
1055
1056static int ip6_rt_type_to_error(u8 fib6_type)
1057{
1058	return fib6_prop[fib6_type];
1059}
1060
1061static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1062{
1063	unsigned short flags = 0;
1064
1065	if (rt->dst_nocount)
1066		flags |= DST_NOCOUNT;
1067	if (rt->dst_nopolicy)
1068		flags |= DST_NOPOLICY;
1069
1070	return flags;
1071}
1072
1073static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1074{
1075	rt->dst.error = ip6_rt_type_to_error(fib6_type);
1076
1077	switch (fib6_type) {
1078	case RTN_BLACKHOLE:
1079		rt->dst.output = dst_discard_out;
1080		rt->dst.input = dst_discard;
1081		break;
1082	case RTN_PROHIBIT:
1083		rt->dst.output = ip6_pkt_prohibit_out;
1084		rt->dst.input = ip6_pkt_prohibit;
1085		break;
1086	case RTN_THROW:
1087	case RTN_UNREACHABLE:
1088	default:
1089		rt->dst.output = ip6_pkt_discard_out;
1090		rt->dst.input = ip6_pkt_discard;
1091		break;
1092	}
1093}
1094
1095static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1096{
1097	struct fib6_info *f6i = res->f6i;
1098
1099	if (res->fib6_flags & RTF_REJECT) {
1100		ip6_rt_init_dst_reject(rt, res->fib6_type);
1101		return;
1102	}
1103
1104	rt->dst.error = 0;
1105	rt->dst.output = ip6_output;
1106
1107	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1108		rt->dst.input = ip6_input;
1109	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1110		rt->dst.input = ip6_mc_input;
1111	} else {
1112		rt->dst.input = ip6_forward;
1113	}
1114
1115	if (res->nh->fib_nh_lws) {
1116		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1117		lwtunnel_set_redirect(&rt->dst);
1118	}
1119
1120	rt->dst.lastuse = jiffies;
1121}
1122
1123/* Caller must already hold reference to @from */
1124static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1125{
1126	rt->rt6i_flags &= ~RTF_EXPIRES;
1127	rcu_assign_pointer(rt->from, from);
1128	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1129}
1130
1131/* Caller must already hold reference to f6i in result */
1132static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1133{
1134	const struct fib6_nh *nh = res->nh;
1135	const struct net_device *dev = nh->fib_nh_dev;
1136	struct fib6_info *f6i = res->f6i;
1137
1138	ip6_rt_init_dst(rt, res);
1139
1140	rt->rt6i_dst = f6i->fib6_dst;
1141	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1142	rt->rt6i_flags = res->fib6_flags;
1143	if (nh->fib_nh_gw_family) {
1144		rt->rt6i_gateway = nh->fib_nh_gw6;
1145		rt->rt6i_flags |= RTF_GATEWAY;
1146	}
1147	rt6_set_from(rt, f6i);
1148#ifdef CONFIG_IPV6_SUBTREES
1149	rt->rt6i_src = f6i->fib6_src;
1150#endif
1151}
1152
1153static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1154					struct in6_addr *saddr)
1155{
1156	struct fib6_node *pn, *sn;
1157	while (1) {
1158		if (fn->fn_flags & RTN_TL_ROOT)
1159			return NULL;
1160		pn = rcu_dereference(fn->parent);
1161		sn = FIB6_SUBTREE(pn);
1162		if (sn && sn != fn)
1163			fn = fib6_node_lookup(sn, NULL, saddr);
1164		else
1165			fn = pn;
1166		if (fn->fn_flags & RTN_RTINFO)
1167			return fn;
1168	}
1169}
1170
1171static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1172{
1173	struct rt6_info *rt = *prt;
1174
1175	if (dst_hold_safe(&rt->dst))
1176		return true;
1177	if (net) {
1178		rt = net->ipv6.ip6_null_entry;
1179		dst_hold(&rt->dst);
1180	} else {
1181		rt = NULL;
1182	}
1183	*prt = rt;
1184	return false;
1185}
1186
1187/* called with rcu_lock held */
1188static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1189{
1190	struct net_device *dev = res->nh->fib_nh_dev;
1191	struct fib6_info *f6i = res->f6i;
1192	unsigned short flags;
1193	struct rt6_info *nrt;
1194
1195	if (!fib6_info_hold_safe(f6i))
1196		goto fallback;
1197
1198	flags = fib6_info_dst_flags(f6i);
1199	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1200	if (!nrt) {
1201		fib6_info_release(f6i);
1202		goto fallback;
1203	}
1204
1205	ip6_rt_copy_init(nrt, res);
1206	return nrt;
1207
1208fallback:
1209	nrt = dev_net(dev)->ipv6.ip6_null_entry;
1210	dst_hold(&nrt->dst);
1211	return nrt;
1212}
1213
1214INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1215					     struct fib6_table *table,
1216					     struct flowi6 *fl6,
1217					     const struct sk_buff *skb,
1218					     int flags)
1219{
1220	struct fib6_result res = {};
1221	struct fib6_node *fn;
1222	struct rt6_info *rt;
1223
1224	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1225		flags &= ~RT6_LOOKUP_F_IFACE;
1226
1227	rcu_read_lock();
1228	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1229restart:
1230	res.f6i = rcu_dereference(fn->leaf);
1231	if (!res.f6i)
1232		res.f6i = net->ipv6.fib6_null_entry;
1233	else
1234		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1235				 flags);
1236
1237	if (res.f6i == net->ipv6.fib6_null_entry) {
1238		fn = fib6_backtrack(fn, &fl6->saddr);
1239		if (fn)
1240			goto restart;
1241
1242		rt = net->ipv6.ip6_null_entry;
1243		dst_hold(&rt->dst);
1244		goto out;
1245	} else if (res.fib6_flags & RTF_REJECT) {
1246		goto do_create;
1247	}
 
 
1248
1249	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1250			 fl6->flowi6_oif != 0, skb, flags);
1251
1252	/* Search through exception table */
1253	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1254	if (rt) {
1255		if (ip6_hold_safe(net, &rt))
1256			dst_use_noref(&rt->dst, jiffies);
1257	} else {
1258do_create:
1259		rt = ip6_create_rt_rcu(&res);
1260	}
1261
1262out:
1263	trace_fib6_table_lookup(net, &res, table, fl6);
1264
1265	rcu_read_unlock();
1266
1267	return rt;
1268}
1269
1270struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1271				   const struct sk_buff *skb, int flags)
1272{
1273	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1274}
1275EXPORT_SYMBOL_GPL(ip6_route_lookup);
1276
1277struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1278			    const struct in6_addr *saddr, int oif,
1279			    const struct sk_buff *skb, int strict)
1280{
1281	struct flowi6 fl6 = {
1282		.flowi6_oif = oif,
1283		.daddr = *daddr,
1284	};
1285	struct dst_entry *dst;
1286	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1287
1288	if (saddr) {
1289		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1290		flags |= RT6_LOOKUP_F_HAS_SADDR;
1291	}
1292
1293	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1294	if (dst->error == 0)
1295		return (struct rt6_info *) dst;
1296
1297	dst_release(dst);
1298
1299	return NULL;
1300}
1301EXPORT_SYMBOL(rt6_lookup);
1302
1303/* ip6_ins_rt is called with FREE table->tb6_lock.
1304 * It takes new route entry, the addition fails by any reason the
1305 * route is released.
1306 * Caller must hold dst before calling it.
1307 */
1308
1309static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1310			struct netlink_ext_ack *extack)
1311{
1312	int err;
1313	struct fib6_table *table;
1314
1315	table = rt->fib6_table;
1316	spin_lock_bh(&table->tb6_lock);
1317	err = fib6_add(&table->tb6_root, rt, info, extack);
1318	spin_unlock_bh(&table->tb6_lock);
1319
1320	return err;
1321}
1322
1323int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1324{
1325	struct nl_info info = {	.nl_net = net, };
 
1326
1327	return __ip6_ins_rt(rt, &info, NULL);
1328}
1329
1330static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1331					   const struct in6_addr *daddr,
1332					   const struct in6_addr *saddr)
1333{
1334	struct fib6_info *f6i = res->f6i;
1335	struct net_device *dev;
1336	struct rt6_info *rt;
1337
1338	/*
1339	 *	Clone the route.
1340	 */
1341
1342	if (!fib6_info_hold_safe(f6i))
1343		return NULL;
 
 
1344
1345	dev = ip6_rt_get_dev_rcu(res);
1346	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1347	if (!rt) {
1348		fib6_info_release(f6i);
1349		return NULL;
1350	}
1351
1352	ip6_rt_copy_init(rt, res);
1353	rt->rt6i_flags |= RTF_CACHE;
 
 
1354	rt->rt6i_dst.addr = *daddr;
1355	rt->rt6i_dst.plen = 128;
1356
1357	if (!rt6_is_gw_or_nonexthop(res)) {
1358		if (f6i->fib6_dst.plen != 128 &&
1359		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1360			rt->rt6i_flags |= RTF_ANYCAST;
1361#ifdef CONFIG_IPV6_SUBTREES
1362		if (rt->rt6i_src.plen && saddr) {
1363			rt->rt6i_src.addr = *saddr;
1364			rt->rt6i_src.plen = 128;
1365		}
1366#endif
1367	}
1368
1369	return rt;
1370}
1371
1372static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1373{
1374	struct fib6_info *f6i = res->f6i;
1375	unsigned short flags = fib6_info_dst_flags(f6i);
1376	struct net_device *dev;
1377	struct rt6_info *pcpu_rt;
1378
1379	if (!fib6_info_hold_safe(f6i))
1380		return NULL;
1381
1382	rcu_read_lock();
1383	dev = ip6_rt_get_dev_rcu(res);
1384	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1385	rcu_read_unlock();
1386	if (!pcpu_rt) {
1387		fib6_info_release(f6i);
1388		return NULL;
1389	}
1390	ip6_rt_copy_init(pcpu_rt, res);
1391	pcpu_rt->rt6i_flags |= RTF_PCPU;
1392
1393	if (f6i->nh)
1394		pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1395
1396	return pcpu_rt;
1397}
1398
1399static bool rt6_is_valid(const struct rt6_info *rt6)
1400{
1401	return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1402}
1403
1404/* It should be called with rcu_read_lock() acquired */
1405static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1406{
1407	struct rt6_info *pcpu_rt;
1408
1409	pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
 
1410
1411	if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1412		struct rt6_info *prev, **p;
1413
1414		p = this_cpu_ptr(res->nh->rt6i_pcpu);
1415		prev = xchg(p, NULL);
1416		if (prev) {
1417			dst_dev_put(&prev->dst);
1418			dst_release(&prev->dst);
1419		}
1420
1421		pcpu_rt = NULL;
1422	}
1423
1424	return pcpu_rt;
1425}
1426
1427static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1428					    const struct fib6_result *res)
1429{
 
1430	struct rt6_info *pcpu_rt, *prev, **p;
1431
1432	pcpu_rt = ip6_rt_pcpu_alloc(res);
1433	if (!pcpu_rt)
1434		return NULL;
1435
1436	p = this_cpu_ptr(res->nh->rt6i_pcpu);
1437	prev = cmpxchg(p, NULL, pcpu_rt);
1438	BUG_ON(prev);
1439
1440	if (res->f6i->fib6_destroying) {
1441		struct fib6_info *from;
1442
1443		from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1444		fib6_info_release(from);
1445	}
1446
1447	return pcpu_rt;
1448}
1449
1450/* exception hash table implementation
1451 */
1452static DEFINE_SPINLOCK(rt6_exception_lock);
1453
1454/* Remove rt6_ex from hash table and free the memory
1455 * Caller must hold rt6_exception_lock
1456 */
1457static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1458				 struct rt6_exception *rt6_ex)
1459{
1460	struct fib6_info *from;
1461	struct net *net;
1462
1463	if (!bucket || !rt6_ex)
1464		return;
1465
1466	net = dev_net(rt6_ex->rt6i->dst.dev);
1467	net->ipv6.rt6_stats->fib_rt_cache--;
1468
1469	/* purge completely the exception to allow releasing the held resources:
1470	 * some [sk] cache may keep the dst around for unlimited time
1471	 */
1472	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1473	fib6_info_release(from);
1474	dst_dev_put(&rt6_ex->rt6i->dst);
1475
1476	hlist_del_rcu(&rt6_ex->hlist);
1477	dst_release(&rt6_ex->rt6i->dst);
1478	kfree_rcu(rt6_ex, rcu);
1479	WARN_ON_ONCE(!bucket->depth);
1480	bucket->depth--;
1481}
1482
1483/* Remove oldest rt6_ex in bucket and free the memory
1484 * Caller must hold rt6_exception_lock
1485 */
1486static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1487{
1488	struct rt6_exception *rt6_ex, *oldest = NULL;
1489
1490	if (!bucket)
1491		return;
1492
1493	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1494		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1495			oldest = rt6_ex;
1496	}
1497	rt6_remove_exception(bucket, oldest);
1498}
1499
1500static u32 rt6_exception_hash(const struct in6_addr *dst,
1501			      const struct in6_addr *src)
1502{
1503	static u32 seed __read_mostly;
1504	u32 val;
1505
1506	net_get_random_once(&seed, sizeof(seed));
1507	val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
1508
1509#ifdef CONFIG_IPV6_SUBTREES
1510	if (src)
1511		val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
1512#endif
1513	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1514}
1515
1516/* Helper function to find the cached rt in the hash table
1517 * and update bucket pointer to point to the bucket for this
1518 * (daddr, saddr) pair
1519 * Caller must hold rt6_exception_lock
1520 */
1521static struct rt6_exception *
1522__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1523			      const struct in6_addr *daddr,
1524			      const struct in6_addr *saddr)
1525{
1526	struct rt6_exception *rt6_ex;
1527	u32 hval;
1528
1529	if (!(*bucket) || !daddr)
1530		return NULL;
1531
1532	hval = rt6_exception_hash(daddr, saddr);
1533	*bucket += hval;
1534
1535	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1536		struct rt6_info *rt6 = rt6_ex->rt6i;
1537		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1538
1539#ifdef CONFIG_IPV6_SUBTREES
1540		if (matched && saddr)
1541			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1542#endif
1543		if (matched)
1544			return rt6_ex;
1545	}
1546	return NULL;
1547}
1548
1549/* Helper function to find the cached rt in the hash table
1550 * and update bucket pointer to point to the bucket for this
1551 * (daddr, saddr) pair
1552 * Caller must hold rcu_read_lock()
1553 */
1554static struct rt6_exception *
1555__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1556			 const struct in6_addr *daddr,
1557			 const struct in6_addr *saddr)
1558{
1559	struct rt6_exception *rt6_ex;
1560	u32 hval;
1561
1562	WARN_ON_ONCE(!rcu_read_lock_held());
1563
1564	if (!(*bucket) || !daddr)
1565		return NULL;
1566
1567	hval = rt6_exception_hash(daddr, saddr);
1568	*bucket += hval;
1569
1570	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1571		struct rt6_info *rt6 = rt6_ex->rt6i;
1572		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1573
1574#ifdef CONFIG_IPV6_SUBTREES
1575		if (matched && saddr)
1576			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1577#endif
1578		if (matched)
1579			return rt6_ex;
1580	}
1581	return NULL;
1582}
1583
1584static unsigned int fib6_mtu(const struct fib6_result *res)
1585{
1586	const struct fib6_nh *nh = res->nh;
1587	unsigned int mtu;
1588
1589	if (res->f6i->fib6_pmtu) {
1590		mtu = res->f6i->fib6_pmtu;
1591	} else {
1592		struct net_device *dev = nh->fib_nh_dev;
1593		struct inet6_dev *idev;
1594
1595		rcu_read_lock();
1596		idev = __in6_dev_get(dev);
1597		mtu = idev->cnf.mtu6;
1598		rcu_read_unlock();
1599	}
1600
1601	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1602
1603	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1604}
1605
1606#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
1607
1608/* used when the flushed bit is not relevant, only access to the bucket
1609 * (ie., all bucket users except rt6_insert_exception);
1610 *
1611 * called under rcu lock; sometimes called with rt6_exception_lock held
1612 */
1613static
1614struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1615						       spinlock_t *lock)
1616{
1617	struct rt6_exception_bucket *bucket;
1618
1619	if (lock)
1620		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1621						   lockdep_is_held(lock));
1622	else
1623		bucket = rcu_dereference(nh->rt6i_exception_bucket);
1624
1625	/* remove bucket flushed bit if set */
1626	if (bucket) {
1627		unsigned long p = (unsigned long)bucket;
1628
1629		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1630		bucket = (struct rt6_exception_bucket *)p;
1631	}
1632
1633	return bucket;
1634}
1635
1636static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1637{
1638	unsigned long p = (unsigned long)bucket;
1639
1640	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1641}
1642
1643/* called with rt6_exception_lock held */
1644static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1645					      spinlock_t *lock)
1646{
1647	struct rt6_exception_bucket *bucket;
1648	unsigned long p;
1649
1650	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1651					   lockdep_is_held(lock));
1652
1653	p = (unsigned long)bucket;
1654	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1655	bucket = (struct rt6_exception_bucket *)p;
1656	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1657}
1658
1659static int rt6_insert_exception(struct rt6_info *nrt,
1660				const struct fib6_result *res)
1661{
1662	struct net *net = dev_net(nrt->dst.dev);
1663	struct rt6_exception_bucket *bucket;
1664	struct fib6_info *f6i = res->f6i;
1665	struct in6_addr *src_key = NULL;
1666	struct rt6_exception *rt6_ex;
1667	struct fib6_nh *nh = res->nh;
1668	int err = 0;
1669
1670	spin_lock_bh(&rt6_exception_lock);
1671
1672	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1673					  lockdep_is_held(&rt6_exception_lock));
1674	if (!bucket) {
1675		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1676				 GFP_ATOMIC);
1677		if (!bucket) {
1678			err = -ENOMEM;
1679			goto out;
1680		}
1681		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1682	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1683		err = -EINVAL;
1684		goto out;
1685	}
1686
1687#ifdef CONFIG_IPV6_SUBTREES
1688	/* fib6_src.plen != 0 indicates f6i is in subtree
1689	 * and exception table is indexed by a hash of
1690	 * both fib6_dst and fib6_src.
1691	 * Otherwise, the exception table is indexed by
1692	 * a hash of only fib6_dst.
1693	 */
1694	if (f6i->fib6_src.plen)
1695		src_key = &nrt->rt6i_src.addr;
1696#endif
1697	/* rt6_mtu_change() might lower mtu on f6i.
1698	 * Only insert this exception route if its mtu
1699	 * is less than f6i's mtu value.
1700	 */
1701	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1702		err = -EINVAL;
1703		goto out;
1704	}
1705
1706	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1707					       src_key);
1708	if (rt6_ex)
1709		rt6_remove_exception(bucket, rt6_ex);
1710
1711	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1712	if (!rt6_ex) {
1713		err = -ENOMEM;
1714		goto out;
1715	}
1716	rt6_ex->rt6i = nrt;
1717	rt6_ex->stamp = jiffies;
1718	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1719	bucket->depth++;
1720	net->ipv6.rt6_stats->fib_rt_cache++;
1721
1722	if (bucket->depth > FIB6_MAX_DEPTH)
1723		rt6_exception_remove_oldest(bucket);
1724
1725out:
1726	spin_unlock_bh(&rt6_exception_lock);
1727
1728	/* Update fn->fn_sernum to invalidate all cached dst */
1729	if (!err) {
1730		spin_lock_bh(&f6i->fib6_table->tb6_lock);
1731		fib6_update_sernum(net, f6i);
1732		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1733		fib6_force_start_gc(net);
1734	}
1735
1736	return err;
1737}
1738
1739static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1740{
1741	struct rt6_exception_bucket *bucket;
1742	struct rt6_exception *rt6_ex;
1743	struct hlist_node *tmp;
1744	int i;
1745
1746	spin_lock_bh(&rt6_exception_lock);
1747
1748	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1749	if (!bucket)
1750		goto out;
1751
1752	/* Prevent rt6_insert_exception() to recreate the bucket list */
1753	if (!from)
1754		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1755
1756	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1757		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1758			if (!from ||
1759			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
1760				rt6_remove_exception(bucket, rt6_ex);
1761		}
1762		WARN_ON_ONCE(!from && bucket->depth);
1763		bucket++;
1764	}
1765out:
1766	spin_unlock_bh(&rt6_exception_lock);
1767}
1768
1769static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1770{
1771	struct fib6_info *f6i = arg;
1772
1773	fib6_nh_flush_exceptions(nh, f6i);
1774
1775	return 0;
1776}
1777
1778void rt6_flush_exceptions(struct fib6_info *f6i)
1779{
1780	if (f6i->nh)
1781		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1782					 f6i);
1783	else
1784		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1785}
1786
1787/* Find cached rt in the hash table inside passed in rt
1788 * Caller has to hold rcu_read_lock()
1789 */
1790static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1791					   const struct in6_addr *daddr,
1792					   const struct in6_addr *saddr)
1793{
1794	const struct in6_addr *src_key = NULL;
1795	struct rt6_exception_bucket *bucket;
1796	struct rt6_exception *rt6_ex;
1797	struct rt6_info *ret = NULL;
1798
1799#ifdef CONFIG_IPV6_SUBTREES
1800	/* fib6i_src.plen != 0 indicates f6i is in subtree
1801	 * and exception table is indexed by a hash of
1802	 * both fib6_dst and fib6_src.
1803	 * However, the src addr used to create the hash
1804	 * might not be exactly the passed in saddr which
1805	 * is a /128 addr from the flow.
1806	 * So we need to use f6i->fib6_src to redo lookup
1807	 * if the passed in saddr does not find anything.
1808	 * (See the logic in ip6_rt_cache_alloc() on how
1809	 * rt->rt6i_src is updated.)
1810	 */
1811	if (res->f6i->fib6_src.plen)
1812		src_key = saddr;
1813find_ex:
1814#endif
1815	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1816	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1817
1818	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1819		ret = rt6_ex->rt6i;
1820
1821#ifdef CONFIG_IPV6_SUBTREES
1822	/* Use fib6_src as src_key and redo lookup */
1823	if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1824		src_key = &res->f6i->fib6_src.addr;
1825		goto find_ex;
1826	}
1827#endif
1828
1829	return ret;
1830}
1831
1832/* Remove the passed in cached rt from the hash table that contains it */
1833static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1834				    const struct rt6_info *rt)
1835{
1836	const struct in6_addr *src_key = NULL;
1837	struct rt6_exception_bucket *bucket;
1838	struct rt6_exception *rt6_ex;
1839	int err;
1840
1841	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1842		return -ENOENT;
1843
1844	spin_lock_bh(&rt6_exception_lock);
1845	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1846
1847#ifdef CONFIG_IPV6_SUBTREES
1848	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1849	 * and exception table is indexed by a hash of
1850	 * both rt6i_dst and rt6i_src.
1851	 * Otherwise, the exception table is indexed by
1852	 * a hash of only rt6i_dst.
1853	 */
1854	if (plen)
1855		src_key = &rt->rt6i_src.addr;
1856#endif
1857	rt6_ex = __rt6_find_exception_spinlock(&bucket,
1858					       &rt->rt6i_dst.addr,
1859					       src_key);
1860	if (rt6_ex) {
1861		rt6_remove_exception(bucket, rt6_ex);
1862		err = 0;
1863	} else {
1864		err = -ENOENT;
 
 
 
 
 
 
 
1865	}
1866
1867	spin_unlock_bh(&rt6_exception_lock);
1868	return err;
 
1869}
1870
1871struct fib6_nh_excptn_arg {
1872	struct rt6_info	*rt;
1873	int		plen;
1874};
1875
1876static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1877{
1878	struct fib6_nh_excptn_arg *arg = _arg;
1879	int err;
 
1880
1881	err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1882	if (err == 0)
1883		return 1;
1884
1885	return 0;
1886}
1887
1888static int rt6_remove_exception_rt(struct rt6_info *rt)
1889{
1890	struct fib6_info *from;
1891
1892	from = rcu_dereference(rt->from);
1893	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1894		return -EINVAL;
1895
1896	if (from->nh) {
1897		struct fib6_nh_excptn_arg arg = {
1898			.rt = rt,
1899			.plen = from->fib6_src.plen
1900		};
1901		int rc;
1902
1903		/* rc = 1 means an entry was found */
1904		rc = nexthop_for_each_fib6_nh(from->nh,
1905					      rt6_nh_remove_exception_rt,
1906					      &arg);
1907		return rc ? 0 : -ENOENT;
1908	}
1909
1910	return fib6_nh_remove_exception(from->fib6_nh,
1911					from->fib6_src.plen, rt);
1912}
1913
1914/* Find rt6_ex which contains the passed in rt cache and
1915 * refresh its stamp
1916 */
1917static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1918				     const struct rt6_info *rt)
1919{
1920	const struct in6_addr *src_key = NULL;
1921	struct rt6_exception_bucket *bucket;
1922	struct rt6_exception *rt6_ex;
1923
1924	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1925#ifdef CONFIG_IPV6_SUBTREES
1926	/* rt6i_src.plen != 0 indicates 'from' is in subtree
1927	 * and exception table is indexed by a hash of
1928	 * both rt6i_dst and rt6i_src.
1929	 * Otherwise, the exception table is indexed by
1930	 * a hash of only rt6i_dst.
1931	 */
1932	if (plen)
1933		src_key = &rt->rt6i_src.addr;
1934#endif
1935	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1936	if (rt6_ex)
1937		rt6_ex->stamp = jiffies;
1938}
1939
1940struct fib6_nh_match_arg {
1941	const struct net_device *dev;
1942	const struct in6_addr	*gw;
1943	struct fib6_nh		*match;
1944};
1945
1946/* determine if fib6_nh has given device and gateway */
1947static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1948{
1949	struct fib6_nh_match_arg *arg = _arg;
1950
1951	if (arg->dev != nh->fib_nh_dev ||
1952	    (arg->gw && !nh->fib_nh_gw_family) ||
1953	    (!arg->gw && nh->fib_nh_gw_family) ||
1954	    (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1955		return 0;
1956
1957	arg->match = nh;
1958
1959	/* found a match, break the loop */
1960	return 1;
1961}
1962
1963static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1964{
1965	struct fib6_info *from;
1966	struct fib6_nh *fib6_nh;
1967
1968	rcu_read_lock();
1969
1970	from = rcu_dereference(rt->from);
1971	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1972		goto unlock;
1973
1974	if (from->nh) {
1975		struct fib6_nh_match_arg arg = {
1976			.dev = rt->dst.dev,
1977			.gw = &rt->rt6i_gateway,
1978		};
1979
1980		nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1981
1982		if (!arg.match)
1983			goto unlock;
1984		fib6_nh = arg.match;
1985	} else {
1986		fib6_nh = from->fib6_nh;
1987	}
1988	fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1989unlock:
1990	rcu_read_unlock();
1991}
1992
1993static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1994					 struct rt6_info *rt, int mtu)
1995{
1996	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1997	 * lowest MTU in the path: always allow updating the route PMTU to
1998	 * reflect PMTU decreases.
1999	 *
2000	 * If the new MTU is higher, and the route PMTU is equal to the local
2001	 * MTU, this means the old MTU is the lowest in the path, so allow
2002	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
2003	 * handle this.
2004	 */
2005
2006	if (dst_mtu(&rt->dst) >= mtu)
2007		return true;
2008
2009	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2010		return true;
2011
2012	return false;
2013}
2014
2015static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2016				       const struct fib6_nh *nh, int mtu)
2017{
2018	struct rt6_exception_bucket *bucket;
2019	struct rt6_exception *rt6_ex;
2020	int i;
2021
2022	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2023	if (!bucket)
2024		return;
2025
2026	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2027		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2028			struct rt6_info *entry = rt6_ex->rt6i;
2029
2030			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2031			 * route), the metrics of its rt->from have already
2032			 * been updated.
2033			 */
2034			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2035			    rt6_mtu_change_route_allowed(idev, entry, mtu))
2036				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2037		}
2038		bucket++;
2039	}
2040}
2041
2042#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2043
2044static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2045					    const struct in6_addr *gateway)
2046{
2047	struct rt6_exception_bucket *bucket;
2048	struct rt6_exception *rt6_ex;
2049	struct hlist_node *tmp;
2050	int i;
2051
2052	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2053		return;
2054
2055	spin_lock_bh(&rt6_exception_lock);
2056	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2057	if (bucket) {
2058		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2059			hlist_for_each_entry_safe(rt6_ex, tmp,
2060						  &bucket->chain, hlist) {
2061				struct rt6_info *entry = rt6_ex->rt6i;
2062
2063				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2064				    RTF_CACHE_GATEWAY &&
2065				    ipv6_addr_equal(gateway,
2066						    &entry->rt6i_gateway)) {
2067					rt6_remove_exception(bucket, rt6_ex);
2068				}
2069			}
2070			bucket++;
2071		}
2072	}
2073
2074	spin_unlock_bh(&rt6_exception_lock);
2075}
2076
2077static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2078				      struct rt6_exception *rt6_ex,
2079				      struct fib6_gc_args *gc_args,
2080				      unsigned long now)
2081{
2082	struct rt6_info *rt = rt6_ex->rt6i;
2083
2084	/* we are pruning and obsoleting aged-out and non gateway exceptions
2085	 * even if others have still references to them, so that on next
2086	 * dst_check() such references can be dropped.
2087	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2088	 * expired, independently from their aging, as per RFC 8201 section 4
2089	 */
2090	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2091		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2092			RT6_TRACE("aging clone %p\n", rt);
2093			rt6_remove_exception(bucket, rt6_ex);
2094			return;
2095		}
2096	} else if (time_after(jiffies, rt->dst.expires)) {
2097		RT6_TRACE("purging expired route %p\n", rt);
2098		rt6_remove_exception(bucket, rt6_ex);
2099		return;
2100	}
2101
2102	if (rt->rt6i_flags & RTF_GATEWAY) {
2103		struct neighbour *neigh;
2104		__u8 neigh_flags = 0;
2105
2106		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2107		if (neigh)
2108			neigh_flags = neigh->flags;
2109
2110		if (!(neigh_flags & NTF_ROUTER)) {
2111			RT6_TRACE("purging route %p via non-router but gateway\n",
2112				  rt);
2113			rt6_remove_exception(bucket, rt6_ex);
2114			return;
2115		}
2116	}
2117
2118	gc_args->more++;
2119}
2120
2121static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2122				   struct fib6_gc_args *gc_args,
2123				   unsigned long now)
2124{
2125	struct rt6_exception_bucket *bucket;
2126	struct rt6_exception *rt6_ex;
2127	struct hlist_node *tmp;
2128	int i;
2129
2130	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2131		return;
2132
2133	rcu_read_lock_bh();
2134	spin_lock(&rt6_exception_lock);
2135	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2136	if (bucket) {
2137		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2138			hlist_for_each_entry_safe(rt6_ex, tmp,
2139						  &bucket->chain, hlist) {
2140				rt6_age_examine_exception(bucket, rt6_ex,
2141							  gc_args, now);
2142			}
2143			bucket++;
2144		}
2145	}
2146	spin_unlock(&rt6_exception_lock);
2147	rcu_read_unlock_bh();
2148}
2149
2150struct fib6_nh_age_excptn_arg {
2151	struct fib6_gc_args	*gc_args;
2152	unsigned long		now;
2153};
2154
2155static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2156{
2157	struct fib6_nh_age_excptn_arg *arg = _arg;
2158
2159	fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2160	return 0;
2161}
2162
2163void rt6_age_exceptions(struct fib6_info *f6i,
2164			struct fib6_gc_args *gc_args,
2165			unsigned long now)
2166{
2167	if (f6i->nh) {
2168		struct fib6_nh_age_excptn_arg arg = {
2169			.gc_args = gc_args,
2170			.now = now
2171		};
2172
2173		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2174					 &arg);
2175	} else {
2176		fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2177	}
2178}
2179
2180/* must be called with rcu lock held */
2181int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2182		      struct flowi6 *fl6, struct fib6_result *res, int strict)
2183{
2184	struct fib6_node *fn, *saved_fn;
2185
2186	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2187	saved_fn = fn;
2188
2189	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2190		oif = 0;
2191
2192redo_rt6_select:
2193	rt6_select(net, fn, oif, res, strict);
2194	if (res->f6i == net->ipv6.fib6_null_entry) {
 
 
2195		fn = fib6_backtrack(fn, &fl6->saddr);
2196		if (fn)
2197			goto redo_rt6_select;
2198		else if (strict & RT6_LOOKUP_F_REACHABLE) {
2199			/* also consider unreachable route */
2200			strict &= ~RT6_LOOKUP_F_REACHABLE;
2201			fn = saved_fn;
2202			goto redo_rt6_select;
2203		}
2204	}
2205
2206	trace_fib6_table_lookup(net, res, table, fl6);
2207
2208	return 0;
2209}
2210
2211struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2212			       int oif, struct flowi6 *fl6,
2213			       const struct sk_buff *skb, int flags)
2214{
2215	struct fib6_result res = {};
2216	struct rt6_info *rt = NULL;
2217	int strict = 0;
2218
2219	WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2220		     !rcu_read_lock_held());
2221
2222	strict |= flags & RT6_LOOKUP_F_IFACE;
2223	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2224	if (net->ipv6.devconf_all->forwarding == 0)
2225		strict |= RT6_LOOKUP_F_REACHABLE;
2226
2227	rcu_read_lock();
2228
2229	fib6_table_lookup(net, table, oif, fl6, &res, strict);
2230	if (res.f6i == net->ipv6.fib6_null_entry)
2231		goto out;
2232
2233	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2234
2235	/*Search through exception table */
2236	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2237	if (rt) {
2238		goto out;
2239	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2240			    !res.nh->fib_nh_gw_family)) {
2241		/* Create a RTF_CACHE clone which will not be
2242		 * owned by the fib6 tree.  It is for the special case where
2243		 * the daddr in the skb during the neighbor look-up is different
2244		 * from the fl6->daddr used to look-up route here.
2245		 */
2246		rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2247
2248		if (rt) {
2249			/* 1 refcnt is taken during ip6_rt_cache_alloc().
2250			 * As rt6_uncached_list_add() does not consume refcnt,
2251			 * this refcnt is always returned to the caller even
2252			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2253			 */
2254			rt6_uncached_list_add(rt);
2255			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2256			rcu_read_unlock();
2257
2258			return rt;
2259		}
2260	} else {
2261		/* Get a percpu copy */
2262		local_bh_disable();
2263		rt = rt6_get_pcpu_route(&res);
2264
2265		if (!rt)
2266			rt = rt6_make_pcpu_route(net, &res);
2267
2268		local_bh_enable();
2269	}
2270out:
2271	if (!rt)
2272		rt = net->ipv6.ip6_null_entry;
2273	if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2274		ip6_hold_safe(net, &rt);
2275	rcu_read_unlock();
2276
2277	return rt;
2278}
2279EXPORT_SYMBOL_GPL(ip6_pol_route);
 
2280
2281INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2282					    struct fib6_table *table,
2283					    struct flowi6 *fl6,
2284					    const struct sk_buff *skb,
2285					    int flags)
2286{
2287	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2288}
2289
2290struct dst_entry *ip6_route_input_lookup(struct net *net,
2291					 struct net_device *dev,
2292					 struct flowi6 *fl6,
2293					 const struct sk_buff *skb,
2294					 int flags)
2295{
2296	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2297		flags |= RT6_LOOKUP_F_IFACE;
2298
2299	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2300}
2301EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2302
2303static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2304				  struct flow_keys *keys,
2305				  struct flow_keys *flkeys)
2306{
2307	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2308	const struct ipv6hdr *key_iph = outer_iph;
2309	struct flow_keys *_flkeys = flkeys;
2310	const struct ipv6hdr *inner_iph;
2311	const struct icmp6hdr *icmph;
2312	struct ipv6hdr _inner_iph;
2313	struct icmp6hdr _icmph;
2314
2315	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2316		goto out;
2317
2318	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2319				   sizeof(_icmph), &_icmph);
2320	if (!icmph)
2321		goto out;
2322
2323	if (!icmpv6_is_err(icmph->icmp6_type))
2324		goto out;
 
 
 
 
 
 
 
 
 
 
2325
2326	inner_iph = skb_header_pointer(skb,
2327				       skb_transport_offset(skb) + sizeof(*icmph),
2328				       sizeof(_inner_iph), &_inner_iph);
2329	if (!inner_iph)
2330		goto out;
2331
2332	key_iph = inner_iph;
2333	_flkeys = NULL;
2334out:
2335	if (_flkeys) {
2336		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2337		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2338		keys->tags.flow_label = _flkeys->tags.flow_label;
2339		keys->basic.ip_proto = _flkeys->basic.ip_proto;
2340	} else {
2341		keys->addrs.v6addrs.src = key_iph->saddr;
2342		keys->addrs.v6addrs.dst = key_iph->daddr;
2343		keys->tags.flow_label = ip6_flowlabel(key_iph);
2344		keys->basic.ip_proto = key_iph->nexthdr;
2345	}
2346}
2347
2348/* if skb is set it will be used and fl6 can be NULL */
2349u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2350		       const struct sk_buff *skb, struct flow_keys *flkeys)
2351{
2352	struct flow_keys hash_keys;
2353	u32 mhash;
2354
2355	switch (ip6_multipath_hash_policy(net)) {
2356	case 0:
2357		memset(&hash_keys, 0, sizeof(hash_keys));
2358		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2359		if (skb) {
2360			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2361		} else {
2362			hash_keys.addrs.v6addrs.src = fl6->saddr;
2363			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2364			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2365			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2366		}
2367		break;
2368	case 1:
2369		if (skb) {
2370			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2371			struct flow_keys keys;
2372
2373			/* short-circuit if we already have L4 hash present */
2374			if (skb->l4_hash)
2375				return skb_get_hash_raw(skb) >> 1;
2376
2377			memset(&hash_keys, 0, sizeof(hash_keys));
2378
2379                        if (!flkeys) {
2380				skb_flow_dissect_flow_keys(skb, &keys, flag);
2381				flkeys = &keys;
2382			}
2383			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2384			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2385			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2386			hash_keys.ports.src = flkeys->ports.src;
2387			hash_keys.ports.dst = flkeys->ports.dst;
2388			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2389		} else {
2390			memset(&hash_keys, 0, sizeof(hash_keys));
2391			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2392			hash_keys.addrs.v6addrs.src = fl6->saddr;
2393			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2394			hash_keys.ports.src = fl6->fl6_sport;
2395			hash_keys.ports.dst = fl6->fl6_dport;
2396			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2397		}
2398		break;
2399	case 2:
2400		memset(&hash_keys, 0, sizeof(hash_keys));
2401		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2402		if (skb) {
2403			struct flow_keys keys;
2404
2405			if (!flkeys) {
2406				skb_flow_dissect_flow_keys(skb, &keys, 0);
2407				flkeys = &keys;
2408			}
2409
2410			/* Inner can be v4 or v6 */
2411			if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2412				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2413				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2414				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2415			} else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2416				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2417				hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2418				hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2419				hash_keys.tags.flow_label = flkeys->tags.flow_label;
2420				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2421			} else {
2422				/* Same as case 0 */
2423				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2424				ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2425			}
2426		} else {
2427			/* Same as case 0 */
2428			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2429			hash_keys.addrs.v6addrs.src = fl6->saddr;
2430			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2431			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2432			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2433		}
2434		break;
2435	}
2436	mhash = flow_hash_from_keys(&hash_keys);
2437
2438	return mhash >> 1;
2439}
2440
2441/* Called with rcu held */
2442void ip6_route_input(struct sk_buff *skb)
2443{
2444	const struct ipv6hdr *iph = ipv6_hdr(skb);
2445	struct net *net = dev_net(skb->dev);
2446	int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2447	struct ip_tunnel_info *tun_info;
2448	struct flowi6 fl6 = {
2449		.flowi6_iif = skb->dev->ifindex,
2450		.daddr = iph->daddr,
2451		.saddr = iph->saddr,
2452		.flowlabel = ip6_flowinfo(iph),
2453		.flowi6_mark = skb->mark,
2454		.flowi6_proto = iph->nexthdr,
2455	};
2456	struct flow_keys *flkeys = NULL, _flkeys;
2457
2458	tun_info = skb_tunnel_info(skb);
2459	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2460		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2461
2462	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2463		flkeys = &_flkeys;
2464
2465	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2466		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2467	skb_dst_drop(skb);
2468	skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2469						      &fl6, skb, flags));
2470}
2471
2472INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2473					     struct fib6_table *table,
2474					     struct flowi6 *fl6,
2475					     const struct sk_buff *skb,
2476					     int flags)
2477{
2478	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2479}
2480
2481struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2482					       const struct sock *sk,
2483					       struct flowi6 *fl6, int flags)
2484{
 
2485	bool any_src;
2486
2487	if (ipv6_addr_type(&fl6->daddr) &
2488	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2489		struct dst_entry *dst;
2490
2491		/* This function does not take refcnt on the dst */
2492		dst = l3mdev_link_scope_lookup(net, fl6);
2493		if (dst)
2494			return dst;
2495	}
2496
2497	fl6->flowi6_iif = LOOPBACK_IFINDEX;
2498
2499	flags |= RT6_LOOKUP_F_DST_NOREF;
2500	any_src = ipv6_addr_any(&fl6->saddr);
2501	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2502	    (fl6->flowi6_oif && any_src))
2503		flags |= RT6_LOOKUP_F_IFACE;
2504
2505	if (!any_src)
2506		flags |= RT6_LOOKUP_F_HAS_SADDR;
2507	else if (sk)
2508		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2509
2510	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2511}
2512EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2513
2514struct dst_entry *ip6_route_output_flags(struct net *net,
2515					 const struct sock *sk,
2516					 struct flowi6 *fl6,
2517					 int flags)
2518{
2519        struct dst_entry *dst;
2520        struct rt6_info *rt6;
2521
2522        rcu_read_lock();
2523        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2524        rt6 = (struct rt6_info *)dst;
2525        /* For dst cached in uncached_list, refcnt is already taken. */
2526        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2527                dst = &net->ipv6.ip6_null_entry->dst;
2528                dst_hold(dst);
2529        }
2530        rcu_read_unlock();
2531
2532        return dst;
2533}
2534EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2535
2536struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2537{
2538	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2539	struct net_device *loopback_dev = net->loopback_dev;
2540	struct dst_entry *new = NULL;
2541
2542	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2543		       DST_OBSOLETE_DEAD, 0);
2544	if (rt) {
2545		rt6_info_init(rt);
2546		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2547
2548		new = &rt->dst;
2549		new->__use = 1;
2550		new->input = dst_discard;
2551		new->output = dst_discard_out;
2552
2553		dst_copy_metrics(new, &ort->dst);
 
 
 
2554
2555		rt->rt6i_idev = in6_dev_get(loopback_dev);
2556		rt->rt6i_gateway = ort->rt6i_gateway;
2557		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
 
2558
2559		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2560#ifdef CONFIG_IPV6_SUBTREES
2561		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2562#endif
 
 
2563	}
2564
2565	dst_release(dst_orig);
2566	return new ? new : ERR_PTR(-ENOMEM);
2567}
2568
2569/*
2570 *	Destination cache support functions
2571 */
2572
2573static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2574{
2575	u32 rt_cookie = 0;
2576
2577	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2578		return false;
2579
2580	if (fib6_check_expired(f6i))
2581		return false;
2582
2583	return true;
2584}
2585
2586static struct dst_entry *rt6_check(struct rt6_info *rt,
2587				   struct fib6_info *from,
2588				   u32 cookie)
2589{
2590	u32 rt_cookie = 0;
2591
2592	if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2593	    rt_cookie != cookie)
2594		return NULL;
2595
2596	if (rt6_check_expired(rt))
2597		return NULL;
2598
2599	return &rt->dst;
2600}
2601
2602static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2603					    struct fib6_info *from,
2604					    u32 cookie)
2605{
2606	if (!__rt6_check_expired(rt) &&
2607	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2608	    fib6_check(from, cookie))
2609		return &rt->dst;
2610	else
2611		return NULL;
2612}
2613
2614static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2615{
2616	struct dst_entry *dst_ret;
2617	struct fib6_info *from;
2618	struct rt6_info *rt;
2619
2620	rt = container_of(dst, struct rt6_info, dst);
2621
2622	if (rt->sernum)
2623		return rt6_is_valid(rt) ? dst : NULL;
2624
2625	rcu_read_lock();
2626
2627	/* All IPV6 dsts are created with ->obsolete set to the value
2628	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2629	 * into this function always.
2630	 */
2631
2632	from = rcu_dereference(rt->from);
2633
2634	if (from && (rt->rt6i_flags & RTF_PCPU ||
2635	    unlikely(!list_empty(&rt->rt6i_uncached))))
2636		dst_ret = rt6_dst_from_check(rt, from, cookie);
2637	else
2638		dst_ret = rt6_check(rt, from, cookie);
2639
2640	rcu_read_unlock();
2641
2642	return dst_ret;
2643}
2644
2645static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2646{
2647	struct rt6_info *rt = (struct rt6_info *) dst;
2648
2649	if (rt) {
2650		if (rt->rt6i_flags & RTF_CACHE) {
2651			rcu_read_lock();
2652			if (rt6_check_expired(rt)) {
2653				rt6_remove_exception_rt(rt);
2654				dst = NULL;
2655			}
2656			rcu_read_unlock();
2657		} else {
2658			dst_release(dst);
2659			dst = NULL;
2660		}
2661	}
2662	return dst;
2663}
2664
2665static void ip6_link_failure(struct sk_buff *skb)
2666{
2667	struct rt6_info *rt;
2668
2669	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2670
2671	rt = (struct rt6_info *) skb_dst(skb);
2672	if (rt) {
2673		rcu_read_lock();
2674		if (rt->rt6i_flags & RTF_CACHE) {
2675			rt6_remove_exception_rt(rt);
2676		} else {
2677			struct fib6_info *from;
2678			struct fib6_node *fn;
2679
2680			from = rcu_dereference(rt->from);
2681			if (from) {
2682				fn = rcu_dereference(from->fib6_node);
2683				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2684					fn->fn_sernum = -1;
2685			}
2686		}
2687		rcu_read_unlock();
2688	}
2689}
2690
2691static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2692{
2693	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2694		struct fib6_info *from;
2695
2696		rcu_read_lock();
2697		from = rcu_dereference(rt0->from);
2698		if (from)
2699			rt0->dst.expires = from->expires;
2700		rcu_read_unlock();
2701	}
2702
2703	dst_set_expires(&rt0->dst, timeout);
2704	rt0->rt6i_flags |= RTF_EXPIRES;
2705}
2706
2707static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2708{
2709	struct net *net = dev_net(rt->dst.dev);
2710
2711	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2712	rt->rt6i_flags |= RTF_MODIFIED;
 
2713	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2714}
2715
2716static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2717{
2718	return !(rt->rt6i_flags & RTF_CACHE) &&
2719		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2720}
2721
2722static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2723				 const struct ipv6hdr *iph, u32 mtu,
2724				 bool confirm_neigh)
2725{
2726	const struct in6_addr *daddr, *saddr;
2727	struct rt6_info *rt6 = (struct rt6_info *)dst;
2728
2729	/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2730	 * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2731	 * [see also comment in rt6_mtu_change_route()]
2732	 */
2733
2734	if (iph) {
2735		daddr = &iph->daddr;
2736		saddr = &iph->saddr;
2737	} else if (sk) {
2738		daddr = &sk->sk_v6_daddr;
2739		saddr = &inet6_sk(sk)->saddr;
2740	} else {
2741		daddr = NULL;
2742		saddr = NULL;
2743	}
2744
2745	if (confirm_neigh)
2746		dst_confirm_neigh(dst, daddr);
2747
 
2748	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2749	if (mtu >= dst_mtu(dst))
2750		return;
2751
2752	if (!rt6_cache_allowed_for_pmtu(rt6)) {
2753		rt6_do_update_pmtu(rt6, mtu);
2754		/* update rt6_ex->stamp for cache */
2755		if (rt6->rt6i_flags & RTF_CACHE)
2756			rt6_update_exception_stamp_rt(rt6);
2757	} else if (daddr) {
2758		struct fib6_result res = {};
2759		struct rt6_info *nrt6;
2760
2761		rcu_read_lock();
2762		res.f6i = rcu_dereference(rt6->from);
2763		if (!res.f6i)
2764			goto out_unlock;
2765
2766		res.fib6_flags = res.f6i->fib6_flags;
2767		res.fib6_type = res.f6i->fib6_type;
2768
2769		if (res.f6i->nh) {
2770			struct fib6_nh_match_arg arg = {
2771				.dev = dst->dev,
2772				.gw = &rt6->rt6i_gateway,
2773			};
2774
2775			nexthop_for_each_fib6_nh(res.f6i->nh,
2776						 fib6_nh_find_match, &arg);
2777
2778			/* fib6_info uses a nexthop that does not have fib6_nh
2779			 * using the dst->dev + gw. Should be impossible.
2780			 */
2781			if (!arg.match)
2782				goto out_unlock;
2783
2784			res.nh = arg.match;
2785		} else {
2786			res.nh = res.f6i->fib6_nh;
2787		}
2788
2789		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2790		if (nrt6) {
2791			rt6_do_update_pmtu(nrt6, mtu);
2792			if (rt6_insert_exception(nrt6, &res))
2793				dst_release_immediate(&nrt6->dst);
 
 
 
 
 
2794		}
2795out_unlock:
2796		rcu_read_unlock();
2797	}
2798}
2799
2800static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2801			       struct sk_buff *skb, u32 mtu,
2802			       bool confirm_neigh)
2803{
2804	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2805			     confirm_neigh);
2806}
2807
2808void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2809		     int oif, u32 mark, kuid_t uid)
2810{
2811	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2812	struct dst_entry *dst;
2813	struct flowi6 fl6 = {
2814		.flowi6_oif = oif,
2815		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2816		.daddr = iph->daddr,
2817		.saddr = iph->saddr,
2818		.flowlabel = ip6_flowinfo(iph),
2819		.flowi6_uid = uid,
2820	};
2821
2822	dst = ip6_route_output(net, NULL, &fl6);
2823	if (!dst->error)
2824		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2825	dst_release(dst);
2826}
2827EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2828
2829void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2830{
2831	int oif = sk->sk_bound_dev_if;
2832	struct dst_entry *dst;
2833
2834	if (!oif && skb->dev)
2835		oif = l3mdev_master_ifindex(skb->dev);
2836
2837	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2838
2839	dst = __sk_dst_get(sk);
2840	if (!dst || !dst->obsolete ||
2841	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2842		return;
2843
2844	bh_lock_sock(sk);
2845	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2846		ip6_datagram_dst_update(sk, false);
2847	bh_unlock_sock(sk);
2848}
2849EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2850
2851void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2852			   const struct flowi6 *fl6)
2853{
2854#ifdef CONFIG_IPV6_SUBTREES
2855	struct ipv6_pinfo *np = inet6_sk(sk);
2856#endif
2857
2858	ip6_dst_store(sk, dst,
2859		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2860		      &sk->sk_v6_daddr : NULL,
2861#ifdef CONFIG_IPV6_SUBTREES
2862		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2863		      &np->saddr :
2864#endif
2865		      NULL);
2866}
2867
2868static bool ip6_redirect_nh_match(const struct fib6_result *res,
2869				  struct flowi6 *fl6,
2870				  const struct in6_addr *gw,
2871				  struct rt6_info **ret)
2872{
2873	const struct fib6_nh *nh = res->nh;
2874
2875	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2876	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2877		return false;
2878
2879	/* rt_cache's gateway might be different from its 'parent'
2880	 * in the case of an ip redirect.
2881	 * So we keep searching in the exception table if the gateway
2882	 * is different.
2883	 */
2884	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2885		struct rt6_info *rt_cache;
2886
2887		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
2888		if (rt_cache &&
2889		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2890			*ret = rt_cache;
2891			return true;
2892		}
2893		return false;
2894	}
2895	return true;
2896}
2897
2898struct fib6_nh_rd_arg {
2899	struct fib6_result	*res;
2900	struct flowi6		*fl6;
2901	const struct in6_addr	*gw;
2902	struct rt6_info		**ret;
2903};
2904
2905static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
2906{
2907	struct fib6_nh_rd_arg *arg = _arg;
2908
2909	arg->res->nh = nh;
2910	return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
2911}
2912
2913/* Handle redirects */
2914struct ip6rd_flowi {
2915	struct flowi6 fl6;
2916	struct in6_addr gateway;
2917};
2918
2919INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
2920					     struct fib6_table *table,
2921					     struct flowi6 *fl6,
2922					     const struct sk_buff *skb,
2923					     int flags)
2924{
2925	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2926	struct rt6_info *ret = NULL;
2927	struct fib6_result res = {};
2928	struct fib6_nh_rd_arg arg = {
2929		.res = &res,
2930		.fl6 = fl6,
2931		.gw  = &rdfl->gateway,
2932		.ret = &ret
2933	};
2934	struct fib6_info *rt;
2935	struct fib6_node *fn;
2936
2937	/* l3mdev_update_flow overrides oif if the device is enslaved; in
2938	 * this case we must match on the real ingress device, so reset it
2939	 */
2940	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2941		fl6->flowi6_oif = skb->dev->ifindex;
2942
2943	/* Get the "current" route for this destination and
2944	 * check if the redirect has come from appropriate router.
2945	 *
2946	 * RFC 4861 specifies that redirects should only be
2947	 * accepted if they come from the nexthop to the target.
2948	 * Due to the way the routes are chosen, this notion
2949	 * is a bit fuzzy and one might need to check all possible
2950	 * routes.
2951	 */
2952
2953	rcu_read_lock();
2954	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2955restart:
2956	for_each_fib6_node_rt_rcu(fn) {
2957		res.f6i = rt;
2958		if (fib6_check_expired(rt))
2959			continue;
2960		if (rt->fib6_flags & RTF_REJECT)
2961			break;
2962		if (unlikely(rt->nh)) {
2963			if (nexthop_is_blackhole(rt->nh))
2964				continue;
2965			/* on match, res->nh is filled in and potentially ret */
2966			if (nexthop_for_each_fib6_nh(rt->nh,
2967						     fib6_nh_redirect_match,
2968						     &arg))
2969				goto out;
2970		} else {
2971			res.nh = rt->fib6_nh;
2972			if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
2973						  &ret))
2974				goto out;
2975		}
2976	}
2977
2978	if (!rt)
2979		rt = net->ipv6.fib6_null_entry;
2980	else if (rt->fib6_flags & RTF_REJECT) {
2981		ret = net->ipv6.ip6_null_entry;
2982		goto out;
2983	}
2984
2985	if (rt == net->ipv6.fib6_null_entry) {
2986		fn = fib6_backtrack(fn, &fl6->saddr);
2987		if (fn)
2988			goto restart;
2989	}
2990
2991	res.f6i = rt;
2992	res.nh = rt->fib6_nh;
2993out:
2994	if (ret) {
2995		ip6_hold_safe(net, &ret);
2996	} else {
2997		res.fib6_flags = res.f6i->fib6_flags;
2998		res.fib6_type = res.f6i->fib6_type;
2999		ret = ip6_create_rt_rcu(&res);
3000	}
3001
3002	rcu_read_unlock();
3003
3004	trace_fib6_table_lookup(net, &res, table, fl6);
3005	return ret;
3006};
3007
3008static struct dst_entry *ip6_route_redirect(struct net *net,
3009					    const struct flowi6 *fl6,
3010					    const struct sk_buff *skb,
3011					    const struct in6_addr *gateway)
3012{
3013	int flags = RT6_LOOKUP_F_HAS_SADDR;
3014	struct ip6rd_flowi rdfl;
3015
3016	rdfl.fl6 = *fl6;
3017	rdfl.gateway = *gateway;
3018
3019	return fib6_rule_lookup(net, &rdfl.fl6, skb,
3020				flags, __ip6_route_redirect);
3021}
3022
3023void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3024		  kuid_t uid)
3025{
3026	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3027	struct dst_entry *dst;
3028	struct flowi6 fl6 = {
3029		.flowi6_iif = LOOPBACK_IFINDEX,
3030		.flowi6_oif = oif,
3031		.flowi6_mark = mark,
3032		.daddr = iph->daddr,
3033		.saddr = iph->saddr,
3034		.flowlabel = ip6_flowinfo(iph),
3035		.flowi6_uid = uid,
3036	};
3037
3038	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3039	rt6_do_redirect(dst, NULL, skb);
3040	dst_release(dst);
3041}
3042EXPORT_SYMBOL_GPL(ip6_redirect);
3043
3044void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
 
3045{
3046	const struct ipv6hdr *iph = ipv6_hdr(skb);
3047	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3048	struct dst_entry *dst;
3049	struct flowi6 fl6 = {
3050		.flowi6_iif = LOOPBACK_IFINDEX,
3051		.flowi6_oif = oif,
3052		.daddr = msg->dest,
3053		.saddr = iph->daddr,
3054		.flowi6_uid = sock_net_uid(net, NULL),
3055	};
 
3056
3057	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3058	rt6_do_redirect(dst, NULL, skb);
3059	dst_release(dst);
3060}
3061
3062void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3063{
3064	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
3065		     sk->sk_uid);
3066}
3067EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3068
3069static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3070{
3071	struct net_device *dev = dst->dev;
3072	unsigned int mtu = dst_mtu(dst);
3073	struct net *net = dev_net(dev);
3074
3075	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3076
3077	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3078		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3079
3080	/*
3081	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3082	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3083	 * IPV6_MAXPLEN is also valid and means: "any MSS,
3084	 * rely only on pmtu discovery"
3085	 */
3086	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3087		mtu = IPV6_MAXPLEN;
3088	return mtu;
3089}
3090
3091static unsigned int ip6_mtu(const struct dst_entry *dst)
3092{
 
 
3093	struct inet6_dev *idev;
3094	unsigned int mtu;
 
 
3095
3096	mtu = dst_metric_raw(dst, RTAX_MTU);
3097	if (mtu)
3098		goto out;
3099
3100	mtu = IPV6_MIN_MTU;
3101
3102	rcu_read_lock();
3103	idev = __in6_dev_get(dst->dev);
3104	if (idev)
3105		mtu = idev->cnf.mtu6;
3106	rcu_read_unlock();
3107
3108out:
3109	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3110
3111	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3112}
3113
3114/* MTU selection:
3115 * 1. mtu on route is locked - use it
3116 * 2. mtu from nexthop exception
3117 * 3. mtu from egress device
3118 *
3119 * based on ip6_dst_mtu_forward and exception logic of
3120 * rt6_find_cached_rt; called with rcu_read_lock
3121 */
3122u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3123		      const struct in6_addr *daddr,
3124		      const struct in6_addr *saddr)
3125{
3126	const struct fib6_nh *nh = res->nh;
3127	struct fib6_info *f6i = res->f6i;
3128	struct inet6_dev *idev;
3129	struct rt6_info *rt;
3130	u32 mtu = 0;
3131
3132	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3133		mtu = f6i->fib6_pmtu;
3134		if (mtu)
3135			goto out;
3136	}
3137
3138	rt = rt6_find_cached_rt(res, daddr, saddr);
3139	if (unlikely(rt)) {
3140		mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3141	} else {
3142		struct net_device *dev = nh->fib_nh_dev;
3143
3144		mtu = IPV6_MIN_MTU;
3145		idev = __in6_dev_get(dev);
3146		if (idev && idev->cnf.mtu6 > mtu)
3147			mtu = idev->cnf.mtu6;
3148	}
3149
3150	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3151out:
3152	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3153}
3154
3155struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3156				  struct flowi6 *fl6)
3157{
3158	struct dst_entry *dst;
3159	struct rt6_info *rt;
3160	struct inet6_dev *idev = in6_dev_get(dev);
3161	struct net *net = dev_net(dev);
3162
3163	if (unlikely(!idev))
3164		return ERR_PTR(-ENODEV);
3165
3166	rt = ip6_dst_alloc(net, dev, 0);
3167	if (unlikely(!rt)) {
3168		in6_dev_put(idev);
3169		dst = ERR_PTR(-ENOMEM);
3170		goto out;
3171	}
3172
3173	rt->dst.input = ip6_input;
3174	rt->dst.output  = ip6_output;
 
3175	rt->rt6i_gateway  = fl6->daddr;
3176	rt->rt6i_dst.addr = fl6->daddr;
3177	rt->rt6i_dst.plen = 128;
3178	rt->rt6i_idev     = idev;
3179	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3180
3181	/* Add this dst into uncached_list so that rt6_disable_ip() can
3182	 * do proper release of the net_device
3183	 */
3184	rt6_uncached_list_add(rt);
3185	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
 
3186
3187	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3188
3189out:
3190	return dst;
3191}
3192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3193static int ip6_dst_gc(struct dst_ops *ops)
3194{
3195	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3196	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
3197	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
3198	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3199	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3200	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3201	int entries;
3202
3203	entries = dst_entries_get_fast(ops);
3204	if (entries > rt_max_size)
3205		entries = dst_entries_get_slow(ops);
3206
3207	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
3208	    entries <= rt_max_size)
3209		goto out;
3210
3211	net->ipv6.ip6_rt_gc_expire++;
3212	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
3213	entries = dst_entries_get_slow(ops);
3214	if (entries < ops->gc_thresh)
3215		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
3216out:
3217	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
3218	return entries > rt_max_size;
3219}
3220
3221static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3222			       const struct in6_addr *gw_addr, u32 tbid,
3223			       int flags, struct fib6_result *res)
3224{
3225	struct flowi6 fl6 = {
3226		.flowi6_oif = cfg->fc_ifindex,
3227		.daddr = *gw_addr,
3228		.saddr = cfg->fc_prefsrc,
3229	};
3230	struct fib6_table *table;
3231	int err;
3232
3233	table = fib6_get_table(net, tbid);
3234	if (!table)
3235		return -EINVAL;
3236
3237	if (!ipv6_addr_any(&cfg->fc_prefsrc))
3238		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
3239
3240	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
 
 
3241
3242	err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3243	if (!err && res->f6i != net->ipv6.fib6_null_entry)
3244		fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3245				 cfg->fc_ifindex != 0, NULL, flags);
3246
3247	return err;
3248}
3249
3250static int ip6_route_check_nh_onlink(struct net *net,
3251				     struct fib6_config *cfg,
3252				     const struct net_device *dev,
3253				     struct netlink_ext_ack *extack)
3254{
3255	u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3256	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3257	struct fib6_result res = {};
3258	int err;
3259
3260	err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3261	if (!err && !(res.fib6_flags & RTF_REJECT) &&
3262	    /* ignore match if it is the default route */
3263	    !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3264	    (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3265		NL_SET_ERR_MSG(extack,
3266			       "Nexthop has invalid gateway or device mismatch");
3267		err = -EINVAL;
3268	}
3269
3270	return err;
3271}
3272
3273static int ip6_route_check_nh(struct net *net,
3274			      struct fib6_config *cfg,
3275			      struct net_device **_dev,
3276			      struct inet6_dev **idev)
3277{
3278	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3279	struct net_device *dev = _dev ? *_dev : NULL;
3280	int flags = RT6_LOOKUP_F_IFACE;
3281	struct fib6_result res = {};
3282	int err = -EHOSTUNREACH;
3283
3284	if (cfg->fc_table) {
3285		err = ip6_nh_lookup_table(net, cfg, gw_addr,
3286					  cfg->fc_table, flags, &res);
3287		/* gw_addr can not require a gateway or resolve to a reject
3288		 * route. If a device is given, it must match the result.
3289		 */
3290		if (err || res.fib6_flags & RTF_REJECT ||
3291		    res.nh->fib_nh_gw_family ||
3292		    (dev && dev != res.nh->fib_nh_dev))
3293			err = -EHOSTUNREACH;
3294	}
3295
3296	if (err < 0) {
3297		struct flowi6 fl6 = {
3298			.flowi6_oif = cfg->fc_ifindex,
3299			.daddr = *gw_addr,
3300		};
3301
3302		err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3303		if (err || res.fib6_flags & RTF_REJECT ||
3304		    res.nh->fib_nh_gw_family)
3305			err = -EHOSTUNREACH;
3306
3307		if (err)
3308			return err;
3309
3310		fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3311				 cfg->fc_ifindex != 0, NULL, flags);
3312	}
3313
3314	err = 0;
3315	if (dev) {
3316		if (dev != res.nh->fib_nh_dev)
3317			err = -EHOSTUNREACH;
3318	} else {
3319		*_dev = dev = res.nh->fib_nh_dev;
3320		dev_hold(dev);
3321		*idev = in6_dev_get(dev);
3322	}
3323
3324	return err;
3325}
3326
3327static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3328			   struct net_device **_dev, struct inet6_dev **idev,
3329			   struct netlink_ext_ack *extack)
3330{
3331	const struct in6_addr *gw_addr = &cfg->fc_gateway;
3332	int gwa_type = ipv6_addr_type(gw_addr);
3333	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3334	const struct net_device *dev = *_dev;
3335	bool need_addr_check = !dev;
3336	int err = -EINVAL;
3337
3338	/* if gw_addr is local we will fail to detect this in case
3339	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
3340	 * will return already-added prefix route via interface that
3341	 * prefix route was assigned to, which might be non-loopback.
3342	 */
3343	if (dev &&
3344	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3345		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3346		goto out;
 
 
 
 
 
 
 
 
 
3347	}
3348
3349	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3350		/* IPv6 strictly inhibits using not link-local
3351		 * addresses as nexthop address.
3352		 * Otherwise, router will not able to send redirects.
3353		 * It is very good, but in some (rare!) circumstances
3354		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
3355		 * some exceptions. --ANK
3356		 * We allow IPv4-mapped nexthops to support RFC4798-type
3357		 * addressing
3358		 */
3359		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3360			NL_SET_ERR_MSG(extack, "Invalid gateway address");
3361			goto out;
3362		}
3363
3364		rcu_read_lock();
3365
3366		if (cfg->fc_flags & RTNH_F_ONLINK)
3367			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3368		else
3369			err = ip6_route_check_nh(net, cfg, _dev, idev);
3370
3371		rcu_read_unlock();
3372
3373		if (err)
3374			goto out;
3375	}
3376
3377	/* reload in case device was changed */
3378	dev = *_dev;
3379
3380	err = -EINVAL;
3381	if (!dev) {
3382		NL_SET_ERR_MSG(extack, "Egress device not specified");
3383		goto out;
3384	} else if (dev->flags & IFF_LOOPBACK) {
3385		NL_SET_ERR_MSG(extack,
3386			       "Egress device can not be loopback device for this route");
3387		goto out;
3388	}
3389
3390	/* if we did not check gw_addr above, do so now that the
3391	 * egress device has been resolved.
3392	 */
3393	if (need_addr_check &&
3394	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3395		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3396		goto out;
3397	}
3398
3399	err = 0;
3400out:
3401	return err;
3402}
 
3403
3404static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3405{
3406	if ((flags & RTF_REJECT) ||
3407	    (dev && (dev->flags & IFF_LOOPBACK) &&
3408	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
3409	     !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3410		return true;
3411
3412	return false;
3413}
3414
3415int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3416		 struct fib6_config *cfg, gfp_t gfp_flags,
3417		 struct netlink_ext_ack *extack)
3418{
3419	struct net_device *dev = NULL;
3420	struct inet6_dev *idev = NULL;
3421	int addr_type;
3422	int err;
3423
3424	fib6_nh->fib_nh_family = AF_INET6;
3425#ifdef CONFIG_IPV6_ROUTER_PREF
3426	fib6_nh->last_probe = jiffies;
3427#endif
3428	if (cfg->fc_is_fdb) {
3429		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3430		fib6_nh->fib_nh_gw_family = AF_INET6;
3431		return 0;
3432	}
3433
3434	err = -ENODEV;
3435	if (cfg->fc_ifindex) {
3436		dev = dev_get_by_index(net, cfg->fc_ifindex);
3437		if (!dev)
3438			goto out;
3439		idev = in6_dev_get(dev);
3440		if (!idev)
3441			goto out;
3442	}
3443
3444	if (cfg->fc_flags & RTNH_F_ONLINK) {
3445		if (!dev) {
3446			NL_SET_ERR_MSG(extack,
3447				       "Nexthop device required for onlink");
3448			goto out;
 
 
 
 
3449		}
 
 
 
 
 
3450
3451		if (!(dev->flags & IFF_UP)) {
3452			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3453			err = -ENETDOWN;
3454			goto out;
3455		}
3456
3457		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3458	}
 
 
3459
3460	fib6_nh->fib_nh_weight = 1;
3461
3462	/* We cannot add true routes via loopback here,
3463	 * they would result in kernel looping; promote them to reject routes
3464	 */
3465	addr_type = ipv6_addr_type(&cfg->fc_dst);
3466	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 
 
3467		/* hold loopback dev/idev if we haven't done so. */
3468		if (dev != net->loopback_dev) {
3469			if (dev) {
3470				dev_put(dev);
3471				in6_dev_put(idev);
3472			}
3473			dev = net->loopback_dev;
3474			dev_hold(dev);
3475			idev = in6_dev_get(dev);
3476			if (!idev) {
3477				err = -ENODEV;
3478				goto out;
3479			}
3480		}
3481		goto pcpu_alloc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3482	}
3483
3484	if (cfg->fc_flags & RTF_GATEWAY) {
3485		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3486		if (err)
3487			goto out;
3488
3489		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3490		fib6_nh->fib_nh_gw_family = AF_INET6;
3491	}
3492
3493	err = -ENODEV;
3494	if (!dev)
3495		goto out;
 
 
 
 
 
 
 
3496
3497	if (idev->cnf.disable_ipv6) {
3498		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3499		err = -EACCES;
3500		goto out;
3501	}
3502
3503	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3504		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3505		err = -ENETDOWN;
3506		goto out;
3507	}
3508
3509	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3510	    !netif_carrier_ok(dev))
3511		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 
 
 
 
 
 
3512
3513	err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3514				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3515	if (err)
3516		goto out;
3517
3518pcpu_alloc:
3519	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3520	if (!fib6_nh->rt6i_pcpu) {
3521		err = -ENOMEM;
3522		goto out;
3523	}
 
 
 
 
 
 
 
 
 
 
 
3524
3525	fib6_nh->fib_nh_dev = dev;
3526	fib6_nh->fib_nh_oif = dev->ifindex;
3527	err = 0;
3528out:
3529	if (idev)
3530		in6_dev_put(idev);
3531
3532	if (err) {
3533		lwtstate_put(fib6_nh->fib_nh_lws);
3534		fib6_nh->fib_nh_lws = NULL;
3535		if (dev)
3536			dev_put(dev);
3537	}
3538
3539	return err;
3540}
3541
3542void fib6_nh_release(struct fib6_nh *fib6_nh)
3543{
3544	struct rt6_exception_bucket *bucket;
3545
3546	rcu_read_lock();
3547
3548	fib6_nh_flush_exceptions(fib6_nh, NULL);
3549	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3550	if (bucket) {
3551		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3552		kfree(bucket);
3553	}
3554
3555	rcu_read_unlock();
3556
3557	if (fib6_nh->rt6i_pcpu) {
3558		int cpu;
3559
3560		for_each_possible_cpu(cpu) {
3561			struct rt6_info **ppcpu_rt;
3562			struct rt6_info *pcpu_rt;
3563
3564			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3565			pcpu_rt = *ppcpu_rt;
3566			if (pcpu_rt) {
3567				dst_dev_put(&pcpu_rt->dst);
3568				dst_release(&pcpu_rt->dst);
3569				*ppcpu_rt = NULL;
3570			}
3571		}
3572
3573		free_percpu(fib6_nh->rt6i_pcpu);
3574	}
3575
3576	fib_nh_common_release(&fib6_nh->nh_common);
3577}
3578
3579static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3580					      gfp_t gfp_flags,
3581					      struct netlink_ext_ack *extack)
3582{
3583	struct net *net = cfg->fc_nlinfo.nl_net;
3584	struct fib6_info *rt = NULL;
3585	struct nexthop *nh = NULL;
3586	struct fib6_table *table;
3587	struct fib6_nh *fib6_nh;
3588	int err = -EINVAL;
3589	int addr_type;
3590
3591	/* RTF_PCPU is an internal flag; can not be set by userspace */
3592	if (cfg->fc_flags & RTF_PCPU) {
3593		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3594		goto out;
3595	}
3596
3597	/* RTF_CACHE is an internal flag; can not be set by userspace */
3598	if (cfg->fc_flags & RTF_CACHE) {
3599		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3600		goto out;
3601	}
3602
3603	if (cfg->fc_type > RTN_MAX) {
3604		NL_SET_ERR_MSG(extack, "Invalid route type");
3605		goto out;
3606	}
3607
3608	if (cfg->fc_dst_len > 128) {
3609		NL_SET_ERR_MSG(extack, "Invalid prefix length");
3610		goto out;
3611	}
3612	if (cfg->fc_src_len > 128) {
3613		NL_SET_ERR_MSG(extack, "Invalid source address length");
3614		goto out;
3615	}
3616#ifndef CONFIG_IPV6_SUBTREES
3617	if (cfg->fc_src_len) {
3618		NL_SET_ERR_MSG(extack,
3619			       "Specifying source address requires IPV6_SUBTREES to be enabled");
3620		goto out;
3621	}
3622#endif
3623	if (cfg->fc_nh_id) {
3624		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3625		if (!nh) {
3626			NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3627			goto out;
3628		}
3629		err = fib6_check_nexthop(nh, cfg, extack);
3630		if (err)
3631			goto out;
3632	}
3633
3634	err = -ENOBUFS;
3635	if (cfg->fc_nlinfo.nlh &&
3636	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3637		table = fib6_get_table(net, cfg->fc_table);
3638		if (!table) {
3639			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3640			table = fib6_new_table(net, cfg->fc_table);
3641		}
3642	} else {
3643		table = fib6_new_table(net, cfg->fc_table);
3644	}
3645
3646	if (!table)
3647		goto out;
3648
3649	err = -ENOMEM;
3650	rt = fib6_info_alloc(gfp_flags, !nh);
3651	if (!rt)
3652		goto out;
3653
3654	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3655					       extack);
3656	if (IS_ERR(rt->fib6_metrics)) {
3657		err = PTR_ERR(rt->fib6_metrics);
3658		/* Do not leave garbage there. */
3659		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3660		goto out;
3661	}
3662
3663	if (cfg->fc_flags & RTF_ADDRCONF)
3664		rt->dst_nocount = true;
3665
3666	if (cfg->fc_flags & RTF_EXPIRES)
3667		fib6_set_expires(rt, jiffies +
3668				clock_t_to_jiffies(cfg->fc_expires));
3669	else
3670		fib6_clean_expires(rt);
3671
3672	if (cfg->fc_protocol == RTPROT_UNSPEC)
3673		cfg->fc_protocol = RTPROT_BOOT;
3674	rt->fib6_protocol = cfg->fc_protocol;
3675
3676	rt->fib6_table = table;
3677	rt->fib6_metric = cfg->fc_metric;
3678	rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3679	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3680
3681	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3682	rt->fib6_dst.plen = cfg->fc_dst_len;
3683
3684#ifdef CONFIG_IPV6_SUBTREES
3685	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3686	rt->fib6_src.plen = cfg->fc_src_len;
3687#endif
3688	if (nh) {
3689		if (rt->fib6_src.plen) {
3690			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3691			goto out;
3692		}
3693		if (!nexthop_get(nh)) {
3694			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3695			goto out;
3696		}
3697		rt->nh = nh;
3698		fib6_nh = nexthop_fib6_nh(rt->nh);
3699	} else {
3700		err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3701		if (err)
3702			goto out;
3703
3704		fib6_nh = rt->fib6_nh;
3705
3706		/* We cannot add true routes via loopback here, they would
3707		 * result in kernel looping; promote them to reject routes
3708		 */
3709		addr_type = ipv6_addr_type(&cfg->fc_dst);
3710		if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3711				   addr_type))
3712			rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3713	}
3714
3715	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3716		struct net_device *dev = fib6_nh->fib_nh_dev;
3717
3718		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3719			NL_SET_ERR_MSG(extack, "Invalid source address");
3720			err = -EINVAL;
3721			goto out;
3722		}
3723		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3724		rt->fib6_prefsrc.plen = 128;
3725	} else
3726		rt->fib6_prefsrc.plen = 0;
 
 
 
 
 
 
 
 
 
3727
3728	return rt;
3729out:
3730	fib6_info_release(rt);
 
 
 
 
 
 
3731	return ERR_PTR(err);
3732}
3733
3734int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3735		  struct netlink_ext_ack *extack)
3736{
3737	struct fib6_info *rt;
 
3738	int err;
3739
3740	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3741	if (IS_ERR(rt))
3742		return PTR_ERR(rt);
 
 
 
 
 
 
 
 
 
3743
3744	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3745	fib6_info_release(rt);
 
 
 
 
3746
3747	return err;
3748}
3749
3750static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3751{
3752	struct net *net = info->nl_net;
3753	struct fib6_table *table;
3754	int err;
3755
3756	if (rt == net->ipv6.fib6_null_entry) {
 
3757		err = -ENOENT;
3758		goto out;
3759	}
3760
3761	table = rt->fib6_table;
3762	spin_lock_bh(&table->tb6_lock);
3763	err = fib6_del(rt, info);
3764	spin_unlock_bh(&table->tb6_lock);
3765
3766out:
3767	fib6_info_release(rt);
3768	return err;
3769}
3770
3771int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3772{
3773	struct nl_info info = {
3774		.nl_net = net,
3775		.skip_notify = skip_notify
3776	};
3777
3778	return __ip6_del_rt(rt, &info);
3779}
3780
3781static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3782{
3783	struct nl_info *info = &cfg->fc_nlinfo;
3784	struct net *net = info->nl_net;
3785	struct sk_buff *skb = NULL;
3786	struct fib6_table *table;
3787	int err = -ENOENT;
3788
3789	if (rt == net->ipv6.fib6_null_entry)
3790		goto out_put;
3791	table = rt->fib6_table;
3792	spin_lock_bh(&table->tb6_lock);
3793
3794	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3795		struct fib6_info *sibling, *next_sibling;
3796		struct fib6_node *fn;
3797
3798		/* prefer to send a single notification with all hops */
3799		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3800		if (skb) {
3801			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3802
3803			if (rt6_fill_node(net, skb, rt, NULL,
3804					  NULL, NULL, 0, RTM_DELROUTE,
3805					  info->portid, seq, 0) < 0) {
3806				kfree_skb(skb);
3807				skb = NULL;
3808			} else
3809				info->skip_notify = 1;
3810		}
3811
3812		/* 'rt' points to the first sibling route. If it is not the
3813		 * leaf, then we do not need to send a notification. Otherwise,
3814		 * we need to check if the last sibling has a next route or not
3815		 * and emit a replace or delete notification, respectively.
3816		 */
3817		info->skip_notify_kernel = 1;
3818		fn = rcu_dereference_protected(rt->fib6_node,
3819					    lockdep_is_held(&table->tb6_lock));
3820		if (rcu_access_pointer(fn->leaf) == rt) {
3821			struct fib6_info *last_sibling, *replace_rt;
3822
3823			last_sibling = list_last_entry(&rt->fib6_siblings,
3824						       struct fib6_info,
3825						       fib6_siblings);
3826			replace_rt = rcu_dereference_protected(
3827					    last_sibling->fib6_next,
3828					    lockdep_is_held(&table->tb6_lock));
3829			if (replace_rt)
3830				call_fib6_entry_notifiers_replace(net,
3831								  replace_rt);
3832			else
3833				call_fib6_multipath_entry_notifiers(net,
3834						       FIB_EVENT_ENTRY_DEL,
3835						       rt, rt->fib6_nsiblings,
3836						       NULL);
3837		}
3838		list_for_each_entry_safe(sibling, next_sibling,
3839					 &rt->fib6_siblings,
3840					 fib6_siblings) {
3841			err = fib6_del(sibling, info);
3842			if (err)
3843				goto out_unlock;
3844		}
3845	}
3846
3847	err = fib6_del(rt, info);
3848out_unlock:
3849	spin_unlock_bh(&table->tb6_lock);
3850out_put:
3851	fib6_info_release(rt);
3852
3853	if (skb) {
3854		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3855			    info->nlh, gfp_any());
3856	}
3857	return err;
3858}
3859
3860static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3861{
3862	int rc = -ESRCH;
3863
3864	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3865		goto out;
3866
3867	if (cfg->fc_flags & RTF_GATEWAY &&
3868	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3869		goto out;
3870
3871	rc = rt6_remove_exception_rt(rt);
3872out:
3873	return rc;
3874}
3875
3876static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3877			     struct fib6_nh *nh)
3878{
3879	struct fib6_result res = {
3880		.f6i = rt,
3881		.nh = nh,
3882	};
3883	struct rt6_info *rt_cache;
3884
3885	rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3886	if (rt_cache)
3887		return __ip6_del_cached_rt(rt_cache, cfg);
3888
3889	return 0;
3890}
3891
3892struct fib6_nh_del_cached_rt_arg {
3893	struct fib6_config *cfg;
3894	struct fib6_info *f6i;
3895};
3896
3897static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
3898{
3899	struct fib6_nh_del_cached_rt_arg *arg = _arg;
3900	int rc;
3901
3902	rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
3903	return rc != -ESRCH ? rc : 0;
3904}
3905
3906static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
3907{
3908	struct fib6_nh_del_cached_rt_arg arg = {
3909		.cfg = cfg,
3910		.f6i = f6i
3911	};
3912
3913	return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
3914}
3915
3916static int ip6_route_del(struct fib6_config *cfg,
3917			 struct netlink_ext_ack *extack)
3918{
3919	struct fib6_table *table;
3920	struct fib6_info *rt;
3921	struct fib6_node *fn;
 
3922	int err = -ESRCH;
3923
3924	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3925	if (!table) {
3926		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3927		return err;
3928	}
3929
3930	rcu_read_lock();
3931
3932	fn = fib6_locate(&table->tb6_root,
3933			 &cfg->fc_dst, cfg->fc_dst_len,
3934			 &cfg->fc_src, cfg->fc_src_len,
3935			 !(cfg->fc_flags & RTF_CACHE));
3936
3937	if (fn) {
3938		for_each_fib6_node_rt_rcu(fn) {
3939			struct fib6_nh *nh;
3940
3941			if (rt->nh && cfg->fc_nh_id &&
3942			    rt->nh->id != cfg->fc_nh_id)
3943				continue;
3944
3945			if (cfg->fc_flags & RTF_CACHE) {
3946				int rc = 0;
3947
3948				if (rt->nh) {
3949					rc = ip6_del_cached_rt_nh(cfg, rt);
3950				} else if (cfg->fc_nh_id) {
3951					continue;
3952				} else {
3953					nh = rt->fib6_nh;
3954					rc = ip6_del_cached_rt(cfg, rt, nh);
3955				}
3956				if (rc != -ESRCH) {
3957					rcu_read_unlock();
3958					return rc;
3959				}
3960				continue;
3961			}
3962
3963			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3964				continue;
3965			if (cfg->fc_protocol &&
3966			    cfg->fc_protocol != rt->fib6_protocol)
3967				continue;
3968
3969			if (rt->nh) {
3970				if (!fib6_info_hold_safe(rt))
3971					continue;
3972				rcu_read_unlock();
3973
3974				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3975			}
3976			if (cfg->fc_nh_id)
3977				continue;
3978
3979			nh = rt->fib6_nh;
3980			if (cfg->fc_ifindex &&
3981			    (!nh->fib_nh_dev ||
3982			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3983				continue;
3984			if (cfg->fc_flags & RTF_GATEWAY &&
3985			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3986				continue;
3987			if (!fib6_info_hold_safe(rt))
3988				continue;
3989			rcu_read_unlock();
 
3990
3991			/* if gateway was specified only delete the one hop */
3992			if (cfg->fc_flags & RTF_GATEWAY)
3993				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3994
3995			return __ip6_del_rt_siblings(rt, cfg);
3996		}
3997	}
3998	rcu_read_unlock();
3999
4000	return err;
4001}
4002
4003static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
4004{
4005	struct netevent_redirect netevent;
4006	struct rt6_info *rt, *nrt = NULL;
4007	struct fib6_result res = {};
4008	struct ndisc_options ndopts;
4009	struct inet6_dev *in6_dev;
4010	struct neighbour *neigh;
4011	struct rd_msg *msg;
4012	int optlen, on_link;
4013	u8 *lladdr;
4014
4015	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4016	optlen -= sizeof(*msg);
4017
4018	if (optlen < 0) {
4019		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4020		return;
4021	}
4022
4023	msg = (struct rd_msg *)icmp6_hdr(skb);
4024
4025	if (ipv6_addr_is_multicast(&msg->dest)) {
4026		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4027		return;
4028	}
4029
4030	on_link = 0;
4031	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4032		on_link = 1;
4033	} else if (ipv6_addr_type(&msg->target) !=
4034		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4035		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4036		return;
4037	}
4038
4039	in6_dev = __in6_dev_get(skb->dev);
4040	if (!in6_dev)
4041		return;
4042	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
4043		return;
4044
4045	/* RFC2461 8.1:
4046	 *	The IP source address of the Redirect MUST be the same as the current
4047	 *	first-hop router for the specified ICMP Destination Address.
4048	 */
4049
4050	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4051		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4052		return;
4053	}
4054
4055	lladdr = NULL;
4056	if (ndopts.nd_opts_tgt_lladdr) {
4057		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4058					     skb->dev);
4059		if (!lladdr) {
4060			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4061			return;
4062		}
4063	}
4064
4065	rt = (struct rt6_info *) dst;
4066	if (rt->rt6i_flags & RTF_REJECT) {
4067		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4068		return;
4069	}
4070
4071	/* Redirect received -> path was valid.
4072	 * Look, redirects are sent only in response to data packets,
4073	 * so that this nexthop apparently is reachable. --ANK
4074	 */
4075	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4076
4077	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4078	if (!neigh)
4079		return;
4080
4081	/*
4082	 *	We have finally decided to accept it.
4083	 */
4084
4085	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4086		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
4087		     NEIGH_UPDATE_F_OVERRIDE|
4088		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4089				     NEIGH_UPDATE_F_ISROUTER)),
4090		     NDISC_REDIRECT, &ndopts);
4091
4092	rcu_read_lock();
4093	res.f6i = rcu_dereference(rt->from);
4094	if (!res.f6i)
4095		goto out;
4096
4097	if (res.f6i->nh) {
4098		struct fib6_nh_match_arg arg = {
4099			.dev = dst->dev,
4100			.gw = &rt->rt6i_gateway,
4101		};
4102
4103		nexthop_for_each_fib6_nh(res.f6i->nh,
4104					 fib6_nh_find_match, &arg);
4105
4106		/* fib6_info uses a nexthop that does not have fib6_nh
4107		 * using the dst->dev. Should be impossible
4108		 */
4109		if (!arg.match)
4110			goto out;
4111		res.nh = arg.match;
4112	} else {
4113		res.nh = res.f6i->fib6_nh;
4114	}
4115
4116	res.fib6_flags = res.f6i->fib6_flags;
4117	res.fib6_type = res.f6i->fib6_type;
4118	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4119	if (!nrt)
4120		goto out;
4121
4122	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4123	if (on_link)
4124		nrt->rt6i_flags &= ~RTF_GATEWAY;
4125
4126	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4127
4128	/* rt6_insert_exception() will take care of duplicated exceptions */
4129	if (rt6_insert_exception(nrt, &res)) {
4130		dst_release_immediate(&nrt->dst);
4131		goto out;
4132	}
4133
4134	netevent.old = &rt->dst;
4135	netevent.new = &nrt->dst;
4136	netevent.daddr = &msg->dest;
4137	netevent.neigh = neigh;
4138	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4139
 
 
 
 
 
4140out:
4141	rcu_read_unlock();
4142	neigh_release(neigh);
4143}
4144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4145#ifdef CONFIG_IPV6_ROUTE_INFO
4146static struct fib6_info *rt6_get_route_info(struct net *net,
4147					   const struct in6_addr *prefix, int prefixlen,
4148					   const struct in6_addr *gwaddr,
4149					   struct net_device *dev)
4150{
4151	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4152	int ifindex = dev->ifindex;
4153	struct fib6_node *fn;
4154	struct fib6_info *rt = NULL;
4155	struct fib6_table *table;
4156
4157	table = fib6_get_table(net, tb_id);
4158	if (!table)
4159		return NULL;
4160
4161	rcu_read_lock();
4162	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4163	if (!fn)
4164		goto out;
4165
4166	for_each_fib6_node_rt_rcu(fn) {
4167		/* these routes do not use nexthops */
4168		if (rt->nh)
4169			continue;
4170		if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4171			continue;
4172		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4173		    !rt->fib6_nh->fib_nh_gw_family)
4174			continue;
4175		if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4176			continue;
4177		if (!fib6_info_hold_safe(rt))
4178			continue;
 
4179		break;
4180	}
4181out:
4182	rcu_read_unlock();
4183	return rt;
4184}
4185
4186static struct fib6_info *rt6_add_route_info(struct net *net,
4187					   const struct in6_addr *prefix, int prefixlen,
4188					   const struct in6_addr *gwaddr,
4189					   struct net_device *dev,
4190					   unsigned int pref)
4191{
4192	struct fib6_config cfg = {
4193		.fc_metric	= IP6_RT_PRIO_USER,
4194		.fc_ifindex	= dev->ifindex,
4195		.fc_dst_len	= prefixlen,
4196		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4197				  RTF_UP | RTF_PREF(pref),
4198		.fc_protocol = RTPROT_RA,
4199		.fc_type = RTN_UNICAST,
4200		.fc_nlinfo.portid = 0,
4201		.fc_nlinfo.nlh = NULL,
4202		.fc_nlinfo.nl_net = net,
4203	};
4204
4205	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
4206	cfg.fc_dst = *prefix;
4207	cfg.fc_gateway = *gwaddr;
4208
4209	/* We should treat it as a default route if prefix length is 0. */
4210	if (!prefixlen)
4211		cfg.fc_flags |= RTF_DEFAULT;
4212
4213	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4214
4215	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4216}
4217#endif
4218
4219struct fib6_info *rt6_get_dflt_router(struct net *net,
4220				     const struct in6_addr *addr,
4221				     struct net_device *dev)
4222{
4223	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
4224	struct fib6_info *rt;
4225	struct fib6_table *table;
4226
4227	table = fib6_get_table(net, tb_id);
4228	if (!table)
4229		return NULL;
4230
4231	rcu_read_lock();
4232	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4233		struct fib6_nh *nh;
4234
4235		/* RA routes do not use nexthops */
4236		if (rt->nh)
4237			continue;
4238
4239		nh = rt->fib6_nh;
4240		if (dev == nh->fib_nh_dev &&
4241		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4242		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4243			break;
4244	}
4245	if (rt && !fib6_info_hold_safe(rt))
4246		rt = NULL;
4247	rcu_read_unlock();
4248	return rt;
4249}
4250
4251struct fib6_info *rt6_add_dflt_router(struct net *net,
4252				     const struct in6_addr *gwaddr,
4253				     struct net_device *dev,
4254				     unsigned int pref)
4255{
4256	struct fib6_config cfg = {
4257		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
4258		.fc_metric	= IP6_RT_PRIO_USER,
4259		.fc_ifindex	= dev->ifindex,
4260		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4261				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4262		.fc_protocol = RTPROT_RA,
4263		.fc_type = RTN_UNICAST,
4264		.fc_nlinfo.portid = 0,
4265		.fc_nlinfo.nlh = NULL,
4266		.fc_nlinfo.nl_net = net,
4267	};
4268
4269	cfg.fc_gateway = *gwaddr;
4270
4271	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4272		struct fib6_table *table;
4273
4274		table = fib6_get_table(dev_net(dev), cfg.fc_table);
4275		if (table)
4276			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4277	}
4278
4279	return rt6_get_dflt_router(net, gwaddr, dev);
4280}
4281
4282static void __rt6_purge_dflt_routers(struct net *net,
4283				     struct fib6_table *table)
4284{
4285	struct fib6_info *rt;
4286
4287restart:
4288	rcu_read_lock();
4289	for_each_fib6_node_rt_rcu(&table->tb6_root) {
4290		struct net_device *dev = fib6_info_nh_dev(rt);
4291		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4292
4293		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4294		    (!idev || idev->cnf.accept_ra != 2) &&
4295		    fib6_info_hold_safe(rt)) {
4296			rcu_read_unlock();
4297			ip6_del_rt(net, rt, false);
4298			goto restart;
4299		}
4300	}
4301	rcu_read_unlock();
4302
4303	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
4304}
4305
4306void rt6_purge_dflt_routers(struct net *net)
4307{
 
4308	struct fib6_table *table;
4309	struct hlist_head *head;
4310	unsigned int h;
4311
4312	rcu_read_lock();
 
 
 
4313
4314	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
4315		head = &net->ipv6.fib_table_hash[h];
4316		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
4317			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
4318				__rt6_purge_dflt_routers(net, table);
 
 
 
 
4319		}
4320	}
4321
4322	rcu_read_unlock();
4323}
4324
4325static void rtmsg_to_fib6_config(struct net *net,
4326				 struct in6_rtmsg *rtmsg,
4327				 struct fib6_config *cfg)
4328{
4329	*cfg = (struct fib6_config){
4330		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4331			 : RT6_TABLE_MAIN,
4332		.fc_ifindex = rtmsg->rtmsg_ifindex,
4333		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4334		.fc_expires = rtmsg->rtmsg_info,
4335		.fc_dst_len = rtmsg->rtmsg_dst_len,
4336		.fc_src_len = rtmsg->rtmsg_src_len,
4337		.fc_flags = rtmsg->rtmsg_flags,
4338		.fc_type = rtmsg->rtmsg_type,
4339
4340		.fc_nlinfo.nl_net = net,
 
 
 
 
 
 
 
 
 
4341
4342		.fc_dst = rtmsg->rtmsg_dst,
4343		.fc_src = rtmsg->rtmsg_src,
4344		.fc_gateway = rtmsg->rtmsg_gateway,
4345	};
4346}
4347
4348int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4349{
4350	struct fib6_config cfg;
 
4351	int err;
4352
4353	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4354		return -EINVAL;
4355	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4356		return -EPERM;
 
 
 
 
 
 
 
4357
4358	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
 
 
 
 
 
 
 
 
 
 
4359
4360	rtnl_lock();
4361	switch (cmd) {
4362	case SIOCADDRT:
4363		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4364		break;
4365	case SIOCDELRT:
4366		err = ip6_route_del(&cfg, NULL);
4367		break;
4368	}
4369	rtnl_unlock();
4370	return err;
4371}
4372
4373/*
4374 *	Drop the packet on the floor
4375 */
4376
4377static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4378{
 
4379	struct dst_entry *dst = skb_dst(skb);
4380	struct net *net = dev_net(dst->dev);
4381	struct inet6_dev *idev;
4382	int type;
4383
4384	if (netif_is_l3_master(skb->dev) &&
4385	    dst->dev == net->loopback_dev)
4386		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4387	else
4388		idev = ip6_dst_idev(dst);
4389
4390	switch (ipstats_mib_noroutes) {
4391	case IPSTATS_MIB_INNOROUTES:
4392		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4393		if (type == IPV6_ADDR_ANY) {
4394			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 
4395			break;
4396		}
4397		fallthrough;
4398	case IPSTATS_MIB_OUTNOROUTES:
4399		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
 
4400		break;
4401	}
4402
4403	/* Start over by dropping the dst for l3mdev case */
4404	if (netif_is_l3_master(skb->dev))
4405		skb_dst_drop(skb);
4406
4407	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4408	kfree_skb(skb);
4409	return 0;
4410}
4411
4412static int ip6_pkt_discard(struct sk_buff *skb)
4413{
4414	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4415}
4416
4417static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4418{
4419	skb->dev = skb_dst(skb)->dev;
4420	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4421}
4422
4423static int ip6_pkt_prohibit(struct sk_buff *skb)
4424{
4425	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4426}
4427
4428static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4429{
4430	skb->dev = skb_dst(skb)->dev;
4431	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4432}
4433
4434/*
4435 *	Allocate a dst for local (unicast / anycast) address.
4436 */
4437
4438struct fib6_info *addrconf_f6i_alloc(struct net *net,
4439				     struct inet6_dev *idev,
4440				     const struct in6_addr *addr,
4441				     bool anycast, gfp_t gfp_flags)
4442{
4443	struct fib6_config cfg = {
4444		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4445		.fc_ifindex = idev->dev->ifindex,
4446		.fc_flags = RTF_UP | RTF_NONEXTHOP,
4447		.fc_dst = *addr,
4448		.fc_dst_len = 128,
4449		.fc_protocol = RTPROT_KERNEL,
4450		.fc_nlinfo.nl_net = net,
4451		.fc_ignore_dev_down = true,
4452	};
4453	struct fib6_info *f6i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4454
4455	if (anycast) {
4456		cfg.fc_type = RTN_ANYCAST;
4457		cfg.fc_flags |= RTF_ANYCAST;
4458	} else {
4459		cfg.fc_type = RTN_LOCAL;
4460		cfg.fc_flags |= RTF_LOCAL;
4461	}
4462
4463	f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
4464	if (!IS_ERR(f6i))
4465		f6i->dst_nocount = true;
4466	return f6i;
 
 
 
 
 
 
 
 
 
 
 
4467}
4468
4469/* remove deleted ip from prefsrc entries */
4470struct arg_dev_net_ip {
4471	struct net_device *dev;
4472	struct net *net;
4473	struct in6_addr *addr;
4474};
4475
4476static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4477{
4478	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
4479	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4480	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4481
4482	if (!rt->nh &&
4483	    ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
4484	    rt != net->ipv6.fib6_null_entry &&
4485	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
4486		spin_lock_bh(&rt6_exception_lock);
4487		/* remove prefsrc entry */
4488		rt->fib6_prefsrc.plen = 0;
4489		spin_unlock_bh(&rt6_exception_lock);
4490	}
4491	return 0;
4492}
4493
4494void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4495{
4496	struct net *net = dev_net(ifp->idev->dev);
4497	struct arg_dev_net_ip adni = {
4498		.dev = ifp->idev->dev,
4499		.net = net,
4500		.addr = &ifp->addr,
4501	};
4502	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4503}
4504
4505#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
 
4506
4507/* Remove routers and update dst entries when gateway turn into host. */
4508static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4509{
4510	struct in6_addr *gateway = (struct in6_addr *)arg;
4511	struct fib6_nh *nh;
4512
4513	/* RA routes do not use nexthops */
4514	if (rt->nh)
4515		return 0;
4516
4517	nh = rt->fib6_nh;
4518	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4519	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4520		return -1;
4521
4522	/* Further clean up cached routes in exception table.
4523	 * This is needed because cached route may have a different
4524	 * gateway than its 'parent' in the case of an ip redirect.
4525	 */
4526	fib6_nh_exceptions_clean_tohost(nh, gateway);
4527
4528	return 0;
4529}
4530
4531void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4532{
4533	fib6_clean_all(net, fib6_clean_tohost, gateway);
4534}
4535
4536struct arg_netdev_event {
4537	const struct net_device *dev;
4538	union {
4539		unsigned char nh_flags;
4540		unsigned long event;
4541	};
4542};
4543
4544static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4545{
4546	struct fib6_info *iter;
4547	struct fib6_node *fn;
4548
4549	fn = rcu_dereference_protected(rt->fib6_node,
4550			lockdep_is_held(&rt->fib6_table->tb6_lock));
4551	iter = rcu_dereference_protected(fn->leaf,
4552			lockdep_is_held(&rt->fib6_table->tb6_lock));
4553	while (iter) {
4554		if (iter->fib6_metric == rt->fib6_metric &&
4555		    rt6_qualify_for_ecmp(iter))
4556			return iter;
4557		iter = rcu_dereference_protected(iter->fib6_next,
4558				lockdep_is_held(&rt->fib6_table->tb6_lock));
4559	}
4560
4561	return NULL;
4562}
4563
4564/* only called for fib entries with builtin fib6_nh */
4565static bool rt6_is_dead(const struct fib6_info *rt)
4566{
4567	if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4568	    (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4569	     ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4570		return true;
4571
4572	return false;
4573}
4574
4575static int rt6_multipath_total_weight(const struct fib6_info *rt)
4576{
4577	struct fib6_info *iter;
4578	int total = 0;
4579
4580	if (!rt6_is_dead(rt))
4581		total += rt->fib6_nh->fib_nh_weight;
4582
4583	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4584		if (!rt6_is_dead(iter))
4585			total += iter->fib6_nh->fib_nh_weight;
4586	}
4587
4588	return total;
4589}
4590
4591static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4592{
4593	int upper_bound = -1;
4594
4595	if (!rt6_is_dead(rt)) {
4596		*weight += rt->fib6_nh->fib_nh_weight;
4597		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4598						    total) - 1;
4599	}
4600	atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4601}
4602
4603static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4604{
4605	struct fib6_info *iter;
4606	int weight = 0;
4607
4608	rt6_upper_bound_set(rt, &weight, total);
4609
4610	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4611		rt6_upper_bound_set(iter, &weight, total);
4612}
4613
4614void rt6_multipath_rebalance(struct fib6_info *rt)
4615{
4616	struct fib6_info *first;
4617	int total;
4618
4619	/* In case the entire multipath route was marked for flushing,
4620	 * then there is no need to rebalance upon the removal of every
4621	 * sibling route.
4622	 */
4623	if (!rt->fib6_nsiblings || rt->should_flush)
4624		return;
4625
4626	/* During lookup routes are evaluated in order, so we need to
4627	 * make sure upper bounds are assigned from the first sibling
4628	 * onwards.
4629	 */
4630	first = rt6_multipath_first_sibling(rt);
4631	if (WARN_ON_ONCE(!first))
4632		return;
4633
4634	total = rt6_multipath_total_weight(first);
4635	rt6_multipath_upper_bound_set(first, total);
4636}
4637
4638static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4639{
4640	const struct arg_netdev_event *arg = p_arg;
4641	struct net *net = dev_net(arg->dev);
4642
4643	if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4644	    rt->fib6_nh->fib_nh_dev == arg->dev) {
4645		rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4646		fib6_update_sernum_upto_root(net, rt);
4647		rt6_multipath_rebalance(rt);
4648	}
4649
4650	return 0;
4651}
4652
4653void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4654{
4655	struct arg_netdev_event arg = {
4656		.dev = dev,
4657		{
4658			.nh_flags = nh_flags,
4659		},
4660	};
4661
4662	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4663		arg.nh_flags |= RTNH_F_LINKDOWN;
4664
4665	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4666}
4667
4668/* only called for fib entries with inline fib6_nh */
4669static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4670				   const struct net_device *dev)
4671{
4672	struct fib6_info *iter;
4673
4674	if (rt->fib6_nh->fib_nh_dev == dev)
4675		return true;
4676	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4677		if (iter->fib6_nh->fib_nh_dev == dev)
4678			return true;
4679
4680	return false;
4681}
4682
4683static void rt6_multipath_flush(struct fib6_info *rt)
4684{
4685	struct fib6_info *iter;
4686
4687	rt->should_flush = 1;
4688	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4689		iter->should_flush = 1;
4690}
4691
4692static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4693					     const struct net_device *down_dev)
4694{
4695	struct fib6_info *iter;
4696	unsigned int dead = 0;
4697
4698	if (rt->fib6_nh->fib_nh_dev == down_dev ||
4699	    rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4700		dead++;
4701	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4702		if (iter->fib6_nh->fib_nh_dev == down_dev ||
4703		    iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4704			dead++;
4705
4706	return dead;
4707}
4708
4709static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4710				       const struct net_device *dev,
4711				       unsigned char nh_flags)
4712{
4713	struct fib6_info *iter;
4714
4715	if (rt->fib6_nh->fib_nh_dev == dev)
4716		rt->fib6_nh->fib_nh_flags |= nh_flags;
4717	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4718		if (iter->fib6_nh->fib_nh_dev == dev)
4719			iter->fib6_nh->fib_nh_flags |= nh_flags;
4720}
4721
4722/* called with write lock held for table with rt */
4723static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4724{
4725	const struct arg_netdev_event *arg = p_arg;
4726	const struct net_device *dev = arg->dev;
4727	struct net *net = dev_net(dev);
4728
4729	if (rt == net->ipv6.fib6_null_entry || rt->nh)
4730		return 0;
4731
4732	switch (arg->event) {
4733	case NETDEV_UNREGISTER:
4734		return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4735	case NETDEV_DOWN:
4736		if (rt->should_flush)
4737			return -1;
4738		if (!rt->fib6_nsiblings)
4739			return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4740		if (rt6_multipath_uses_dev(rt, dev)) {
4741			unsigned int count;
4742
4743			count = rt6_multipath_dead_count(rt, dev);
4744			if (rt->fib6_nsiblings + 1 == count) {
4745				rt6_multipath_flush(rt);
4746				return -1;
4747			}
4748			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4749						   RTNH_F_LINKDOWN);
4750			fib6_update_sernum(net, rt);
4751			rt6_multipath_rebalance(rt);
4752		}
4753		return -2;
4754	case NETDEV_CHANGE:
4755		if (rt->fib6_nh->fib_nh_dev != dev ||
4756		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4757			break;
4758		rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4759		rt6_multipath_rebalance(rt);
4760		break;
4761	}
4762
4763	return 0;
4764}
4765
4766void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4767{
4768	struct arg_netdev_event arg = {
4769		.dev = dev,
4770		{
4771			.event = event,
4772		},
4773	};
4774	struct net *net = dev_net(dev);
4775
4776	if (net->ipv6.sysctl.skip_notify_on_dev_down)
4777		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4778	else
4779		fib6_clean_all(net, fib6_ifdown, &arg);
4780}
4781
4782void rt6_disable_ip(struct net_device *dev, unsigned long event)
4783{
4784	rt6_sync_down_dev(dev, event);
4785	rt6_uncached_list_flush_dev(dev_net(dev), dev);
4786	neigh_ifdown(&nd_tbl, dev);
4787}
4788
4789struct rt6_mtu_change_arg {
4790	struct net_device *dev;
4791	unsigned int mtu;
4792	struct fib6_info *f6i;
4793};
4794
4795static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4796{
4797	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4798	struct fib6_info *f6i = arg->f6i;
4799
4800	/* For administrative MTU increase, there is no way to discover
4801	 * IPv6 PMTU increase, so PMTU increase should be updated here.
4802	 * Since RFC 1981 doesn't include administrative MTU increase
4803	 * update PMTU increase is a MUST. (i.e. jumbo frame)
4804	 */
4805	if (nh->fib_nh_dev == arg->dev) {
4806		struct inet6_dev *idev = __in6_dev_get(arg->dev);
4807		u32 mtu = f6i->fib6_pmtu;
4808
4809		if (mtu >= arg->mtu ||
4810		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4811			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4812
4813		spin_lock_bh(&rt6_exception_lock);
4814		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4815		spin_unlock_bh(&rt6_exception_lock);
4816	}
4817
4818	return 0;
4819}
4820
4821static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4822{
4823	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4824	struct inet6_dev *idev;
4825
4826	/* In IPv6 pmtu discovery is not optional,
4827	   so that RTAX_MTU lock cannot disable it.
4828	   We still use this lock to block changes
4829	   caused by addrconf/ndisc.
4830	*/
4831
4832	idev = __in6_dev_get(arg->dev);
4833	if (!idev)
4834		return 0;
4835
4836	if (fib6_metric_locked(f6i, RTAX_MTU))
4837		return 0;
4838
4839	arg->f6i = f6i;
4840	if (f6i->nh) {
4841		/* fib6_nh_mtu_change only returns 0, so this is safe */
4842		return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4843						arg);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4844	}
4845
4846	return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4847}
4848
4849void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4850{
4851	struct rt6_mtu_change_arg arg = {
4852		.dev = dev,
4853		.mtu = mtu,
4854	};
4855
4856	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4857}
4858
4859static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4860	[RTA_UNSPEC]		= { .strict_start_type = RTA_DPORT + 1 },
4861	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4862	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
4863	[RTA_OIF]               = { .type = NLA_U32 },
4864	[RTA_IIF]		= { .type = NLA_U32 },
4865	[RTA_PRIORITY]          = { .type = NLA_U32 },
4866	[RTA_METRICS]           = { .type = NLA_NESTED },
4867	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4868	[RTA_PREF]              = { .type = NLA_U8 },
4869	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
4870	[RTA_ENCAP]		= { .type = NLA_NESTED },
4871	[RTA_EXPIRES]		= { .type = NLA_U32 },
4872	[RTA_UID]		= { .type = NLA_U32 },
4873	[RTA_MARK]		= { .type = NLA_U32 },
4874	[RTA_TABLE]		= { .type = NLA_U32 },
4875	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4876	[RTA_SPORT]		= { .type = NLA_U16 },
4877	[RTA_DPORT]		= { .type = NLA_U16 },
4878	[RTA_NH_ID]		= { .type = NLA_U32 },
4879};
4880
4881static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4882			      struct fib6_config *cfg,
4883			      struct netlink_ext_ack *extack)
4884{
4885	struct rtmsg *rtm;
4886	struct nlattr *tb[RTA_MAX+1];
4887	unsigned int pref;
4888	int err;
4889
4890	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4891				     rtm_ipv6_policy, extack);
4892	if (err < 0)
4893		goto errout;
4894
4895	err = -EINVAL;
4896	rtm = nlmsg_data(nlh);
 
4897
4898	*cfg = (struct fib6_config){
4899		.fc_table = rtm->rtm_table,
4900		.fc_dst_len = rtm->rtm_dst_len,
4901		.fc_src_len = rtm->rtm_src_len,
4902		.fc_flags = RTF_UP,
4903		.fc_protocol = rtm->rtm_protocol,
4904		.fc_type = rtm->rtm_type,
4905
4906		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
4907		.fc_nlinfo.nlh = nlh,
4908		.fc_nlinfo.nl_net = sock_net(skb->sk),
4909	};
4910
4911	if (rtm->rtm_type == RTN_UNREACHABLE ||
4912	    rtm->rtm_type == RTN_BLACKHOLE ||
4913	    rtm->rtm_type == RTN_PROHIBIT ||
4914	    rtm->rtm_type == RTN_THROW)
4915		cfg->fc_flags |= RTF_REJECT;
4916
4917	if (rtm->rtm_type == RTN_LOCAL)
4918		cfg->fc_flags |= RTF_LOCAL;
4919
4920	if (rtm->rtm_flags & RTM_F_CLONED)
4921		cfg->fc_flags |= RTF_CACHE;
4922
4923	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4924
4925	if (tb[RTA_NH_ID]) {
4926		if (tb[RTA_GATEWAY]   || tb[RTA_OIF] ||
4927		    tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
4928			NL_SET_ERR_MSG(extack,
4929				       "Nexthop specification and nexthop id are mutually exclusive");
4930			goto errout;
4931		}
4932		cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
4933	}
4934
4935	if (tb[RTA_GATEWAY]) {
4936		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4937		cfg->fc_flags |= RTF_GATEWAY;
4938	}
4939	if (tb[RTA_VIA]) {
4940		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4941		goto errout;
4942	}
4943
4944	if (tb[RTA_DST]) {
4945		int plen = (rtm->rtm_dst_len + 7) >> 3;
4946
4947		if (nla_len(tb[RTA_DST]) < plen)
4948			goto errout;
4949
4950		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4951	}
4952
4953	if (tb[RTA_SRC]) {
4954		int plen = (rtm->rtm_src_len + 7) >> 3;
4955
4956		if (nla_len(tb[RTA_SRC]) < plen)
4957			goto errout;
4958
4959		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4960	}
4961
4962	if (tb[RTA_PREFSRC])
4963		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4964
4965	if (tb[RTA_OIF])
4966		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4967
4968	if (tb[RTA_PRIORITY])
4969		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4970
4971	if (tb[RTA_METRICS]) {
4972		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4973		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4974	}
4975
4976	if (tb[RTA_TABLE])
4977		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4978
4979	if (tb[RTA_MULTIPATH]) {
4980		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4981		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4982
4983		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4984						     cfg->fc_mp_len, extack);
4985		if (err < 0)
4986			goto errout;
4987	}
4988
4989	if (tb[RTA_PREF]) {
4990		pref = nla_get_u8(tb[RTA_PREF]);
4991		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4992		    pref != ICMPV6_ROUTER_PREF_HIGH)
4993			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4994		cfg->fc_flags |= RTF_PREF(pref);
4995	}
4996
4997	if (tb[RTA_ENCAP])
4998		cfg->fc_encap = tb[RTA_ENCAP];
4999
5000	if (tb[RTA_ENCAP_TYPE]) {
5001		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
5002
5003		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
5004		if (err < 0)
5005			goto errout;
5006	}
5007
5008	if (tb[RTA_EXPIRES]) {
5009		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
5010
5011		if (addrconf_finite_timeout(timeout)) {
5012			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
5013			cfg->fc_flags |= RTF_EXPIRES;
5014		}
5015	}
5016
5017	err = 0;
5018errout:
5019	return err;
5020}
5021
5022struct rt6_nh {
5023	struct fib6_info *fib6_info;
5024	struct fib6_config r_cfg;
 
5025	struct list_head next;
5026};
5027
5028static int ip6_route_info_append(struct net *net,
5029				 struct list_head *rt6_nh_list,
5030				 struct fib6_info *rt,
5031				 struct fib6_config *r_cfg)
 
 
 
 
 
 
 
 
 
5032{
5033	struct rt6_nh *nh;
 
5034	int err = -EEXIST;
5035
5036	list_for_each_entry(nh, rt6_nh_list, next) {
5037		/* check if fib6_info already exists */
5038		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
 
 
 
 
 
5039			return err;
5040	}
5041
5042	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5043	if (!nh)
5044		return -ENOMEM;
5045	nh->fib6_info = rt;
 
 
 
 
 
5046	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5047	list_add_tail(&nh->next, rt6_nh_list);
5048
5049	return 0;
5050}
5051
5052static void ip6_route_mpath_notify(struct fib6_info *rt,
5053				   struct fib6_info *rt_last,
5054				   struct nl_info *info,
5055				   __u16 nlflags)
5056{
5057	/* if this is an APPEND route, then rt points to the first route
5058	 * inserted and rt_last points to last route inserted. Userspace
5059	 * wants a consistent dump of the route which starts at the first
5060	 * nexthop. Since sibling routes are always added at the end of
5061	 * the list, find the first sibling of the last route appended
5062	 */
5063	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5064		rt = list_first_entry(&rt_last->fib6_siblings,
5065				      struct fib6_info,
5066				      fib6_siblings);
5067	}
5068
5069	if (rt)
5070		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5071}
5072
5073static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5074{
5075	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5076	bool should_notify = false;
5077	struct fib6_info *leaf;
5078	struct fib6_node *fn;
5079
5080	rcu_read_lock();
5081	fn = rcu_dereference(rt->fib6_node);
5082	if (!fn)
5083		goto out;
5084
5085	leaf = rcu_dereference(fn->leaf);
5086	if (!leaf)
5087		goto out;
5088
5089	if (rt == leaf ||
5090	    (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5091	     rt6_qualify_for_ecmp(leaf)))
5092		should_notify = true;
5093out:
5094	rcu_read_unlock();
5095
5096	return should_notify;
5097}
5098
5099static int ip6_route_multipath_add(struct fib6_config *cfg,
5100				   struct netlink_ext_ack *extack)
5101{
5102	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5103	struct nl_info *info = &cfg->fc_nlinfo;
5104	struct fib6_config r_cfg;
5105	struct rtnexthop *rtnh;
5106	struct fib6_info *rt;
5107	struct rt6_nh *err_nh;
5108	struct rt6_nh *nh, *nh_safe;
5109	__u16 nlflags;
5110	int remaining;
5111	int attrlen;
5112	int err = 1;
5113	int nhn = 0;
5114	int replace = (cfg->fc_nlinfo.nlh &&
5115		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5116	LIST_HEAD(rt6_nh_list);
5117
5118	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5119	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5120		nlflags |= NLM_F_APPEND;
5121
5122	remaining = cfg->fc_mp_len;
5123	rtnh = (struct rtnexthop *)cfg->fc_mp;
5124
5125	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
5126	 * fib6_info structs per nexthop
5127	 */
5128	while (rtnh_ok(rtnh, remaining)) {
5129		memcpy(&r_cfg, cfg, sizeof(*cfg));
5130		if (rtnh->rtnh_ifindex)
5131			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5132
5133		attrlen = rtnh_attrlen(rtnh);
5134		if (attrlen > 0) {
5135			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5136
5137			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5138			if (nla) {
5139				r_cfg.fc_gateway = nla_get_in6_addr(nla);
5140				r_cfg.fc_flags |= RTF_GATEWAY;
5141			}
5142			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5143			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5144			if (nla)
5145				r_cfg.fc_encap_type = nla_get_u16(nla);
5146		}
5147
5148		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5149		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5150		if (IS_ERR(rt)) {
5151			err = PTR_ERR(rt);
5152			rt = NULL;
5153			goto cleanup;
5154		}
5155		if (!rt6_qualify_for_ecmp(rt)) {
5156			err = -EINVAL;
5157			NL_SET_ERR_MSG(extack,
5158				       "Device only routes can not be added for IPv6 using the multipath API.");
5159			fib6_info_release(rt);
5160			goto cleanup;
5161		}
5162
5163		rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5164
5165		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5166					    rt, &r_cfg);
5167		if (err) {
5168			fib6_info_release(rt);
5169			goto cleanup;
5170		}
5171
5172		rtnh = rtnh_next(rtnh, &remaining);
5173	}
5174
5175	if (list_empty(&rt6_nh_list)) {
5176		NL_SET_ERR_MSG(extack,
5177			       "Invalid nexthop configuration - no valid nexthops");
5178		return -EINVAL;
5179	}
5180
5181	/* for add and replace send one notification with all nexthops.
5182	 * Skip the notification in fib6_add_rt2node and send one with
5183	 * the full route when done
5184	 */
5185	info->skip_notify = 1;
5186
5187	/* For add and replace, send one notification with all nexthops. For
5188	 * append, send one notification with all appended nexthops.
5189	 */
5190	info->skip_notify_kernel = 1;
5191
5192	err_nh = NULL;
5193	list_for_each_entry(nh, &rt6_nh_list, next) {
5194		err = __ip6_ins_rt(nh->fib6_info, info, extack);
5195		fib6_info_release(nh->fib6_info);
5196
5197		if (!err) {
5198			/* save reference to last route successfully inserted */
5199			rt_last = nh->fib6_info;
5200
5201			/* save reference to first route for notification */
5202			if (!rt_notif)
5203				rt_notif = nh->fib6_info;
5204		}
5205
5206		/* nh->fib6_info is used or freed at this point, reset to NULL*/
5207		nh->fib6_info = NULL;
5208		if (err) {
5209			if (replace && nhn)
5210				NL_SET_ERR_MSG_MOD(extack,
5211						   "multipath route replace failed (check consistency of installed routes)");
5212			err_nh = nh;
5213			goto add_errout;
5214		}
5215
5216		/* Because each route is added like a single route we remove
5217		 * these flags after the first nexthop: if there is a collision,
5218		 * we have already failed to add the first nexthop:
5219		 * fib6_add_rt2node() has rejected it; when replacing, old
5220		 * nexthops have been replaced by first new, the rest should
5221		 * be added to it.
5222		 */
5223		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5224						     NLM_F_REPLACE);
5225		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5226		nhn++;
5227	}
5228
5229	/* An in-kernel notification should only be sent in case the new
5230	 * multipath route is added as the first route in the node, or if
5231	 * it was appended to it. We pass 'rt_notif' since it is the first
5232	 * sibling and might allow us to skip some checks in the replace case.
5233	 */
5234	if (ip6_route_mpath_should_notify(rt_notif)) {
5235		enum fib_event_type fib_event;
5236
5237		if (rt_notif->fib6_nsiblings != nhn - 1)
5238			fib_event = FIB_EVENT_ENTRY_APPEND;
5239		else
5240			fib_event = FIB_EVENT_ENTRY_REPLACE;
5241
5242		err = call_fib6_multipath_entry_notifiers(info->nl_net,
5243							  fib_event, rt_notif,
5244							  nhn - 1, extack);
5245		if (err) {
5246			/* Delete all the siblings that were just added */
5247			err_nh = NULL;
5248			goto add_errout;
5249		}
5250	}
5251
5252	/* success ... tell user about new route */
5253	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5254	goto cleanup;
5255
5256add_errout:
5257	/* send notification for routes that were added so that
5258	 * the delete notifications sent by ip6_route_del are
5259	 * coherent
5260	 */
5261	if (rt_notif)
5262		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5263
5264	/* Delete routes that were already added */
5265	list_for_each_entry(nh, &rt6_nh_list, next) {
5266		if (err_nh == nh)
5267			break;
5268		ip6_route_del(&nh->r_cfg, extack);
5269	}
5270
5271cleanup:
5272	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5273		if (nh->fib6_info)
5274			fib6_info_release(nh->fib6_info);
 
5275		list_del(&nh->next);
5276		kfree(nh);
5277	}
5278
5279	return err;
5280}
5281
5282static int ip6_route_multipath_del(struct fib6_config *cfg,
5283				   struct netlink_ext_ack *extack)
5284{
5285	struct fib6_config r_cfg;
5286	struct rtnexthop *rtnh;
5287	int remaining;
5288	int attrlen;
5289	int err = 1, last_err = 0;
5290
5291	remaining = cfg->fc_mp_len;
5292	rtnh = (struct rtnexthop *)cfg->fc_mp;
5293
5294	/* Parse a Multipath Entry */
5295	while (rtnh_ok(rtnh, remaining)) {
5296		memcpy(&r_cfg, cfg, sizeof(*cfg));
5297		if (rtnh->rtnh_ifindex)
5298			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5299
5300		attrlen = rtnh_attrlen(rtnh);
5301		if (attrlen > 0) {
5302			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5303
5304			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5305			if (nla) {
5306				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
5307				r_cfg.fc_flags |= RTF_GATEWAY;
5308			}
5309		}
5310		err = ip6_route_del(&r_cfg, extack);
5311		if (err)
5312			last_err = err;
5313
5314		rtnh = rtnh_next(rtnh, &remaining);
5315	}
5316
5317	return last_err;
5318}
5319
5320static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5321			      struct netlink_ext_ack *extack)
5322{
5323	struct fib6_config cfg;
5324	int err;
5325
5326	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5327	if (err < 0)
5328		return err;
5329
5330	if (cfg.fc_nh_id &&
5331	    !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5332		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5333		return -EINVAL;
5334	}
5335
5336	if (cfg.fc_mp)
5337		return ip6_route_multipath_del(&cfg, extack);
5338	else {
5339		cfg.fc_delete_all_nh = 1;
5340		return ip6_route_del(&cfg, extack);
5341	}
5342}
5343
5344static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5345			      struct netlink_ext_ack *extack)
5346{
5347	struct fib6_config cfg;
5348	int err;
5349
5350	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5351	if (err < 0)
5352		return err;
5353
5354	if (cfg.fc_metric == 0)
5355		cfg.fc_metric = IP6_RT_PRIO_USER;
5356
5357	if (cfg.fc_mp)
5358		return ip6_route_multipath_add(&cfg, extack);
5359	else
5360		return ip6_route_add(&cfg, GFP_KERNEL, extack);
5361}
5362
5363/* add the overhead of this fib6_nh to nexthop_len */
5364static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5365{
5366	int *nexthop_len = arg;
5367
5368	*nexthop_len += nla_total_size(0)	 /* RTA_MULTIPATH */
5369		     + NLA_ALIGN(sizeof(struct rtnexthop))
5370		     + nla_total_size(16); /* RTA_GATEWAY */
5371
5372	if (nh->fib_nh_lws) {
5373		/* RTA_ENCAP_TYPE */
5374		*nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5375		/* RTA_ENCAP */
5376		*nexthop_len += nla_total_size(2);
5377	}
5378
5379	return 0;
5380}
5381
5382static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5383{
5384	int nexthop_len;
5385
5386	if (f6i->nh) {
5387		nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5388		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5389					 &nexthop_len);
5390	} else {
5391		struct fib6_nh *nh = f6i->fib6_nh;
5392
5393		nexthop_len = 0;
5394		if (f6i->fib6_nsiblings) {
5395			nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
5396				    + NLA_ALIGN(sizeof(struct rtnexthop))
5397				    + nla_total_size(16) /* RTA_GATEWAY */
5398				    + lwtunnel_get_encap_size(nh->fib_nh_lws);
5399
5400			nexthop_len *= f6i->fib6_nsiblings;
5401		}
5402		nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5403	}
5404
5405	return NLMSG_ALIGN(sizeof(struct rtmsg))
5406	       + nla_total_size(16) /* RTA_SRC */
5407	       + nla_total_size(16) /* RTA_DST */
5408	       + nla_total_size(16) /* RTA_GATEWAY */
5409	       + nla_total_size(16) /* RTA_PREFSRC */
5410	       + nla_total_size(4) /* RTA_TABLE */
5411	       + nla_total_size(4) /* RTA_IIF */
5412	       + nla_total_size(4) /* RTA_OIF */
5413	       + nla_total_size(4) /* RTA_PRIORITY */
5414	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5415	       + nla_total_size(sizeof(struct rta_cacheinfo))
5416	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5417	       + nla_total_size(1) /* RTA_PREF */
5418	       + nexthop_len;
5419}
5420
5421static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5422				 unsigned char *flags)
 
 
 
5423{
5424	if (nexthop_is_multipath(nh)) {
5425		struct nlattr *mp;
 
 
 
5426
5427		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5428		if (!mp)
5429			goto nla_put_failure;
5430
5431		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5432			goto nla_put_failure;
5433
5434		nla_nest_end(skb, mp);
5435	} else {
5436		struct fib6_nh *fib6_nh;
5437
5438		fib6_nh = nexthop_fib6_nh(nh);
5439		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5440				     flags, false) < 0)
5441			goto nla_put_failure;
5442	}
5443
5444	return 0;
5445
5446nla_put_failure:
5447	return -EMSGSIZE;
5448}
5449
5450static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5451			 struct fib6_info *rt, struct dst_entry *dst,
5452			 struct in6_addr *dest, struct in6_addr *src,
5453			 int iif, int type, u32 portid, u32 seq,
5454			 unsigned int flags)
5455{
5456	struct rt6_info *rt6 = (struct rt6_info *)dst;
5457	struct rt6key *rt6_dst, *rt6_src;
5458	u32 *pmetrics, table, rt6_flags;
5459	unsigned char nh_flags = 0;
5460	struct nlmsghdr *nlh;
5461	struct rtmsg *rtm;
5462	long expires = 0;
5463
5464	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5465	if (!nlh)
5466		return -EMSGSIZE;
5467
5468	if (rt6) {
5469		rt6_dst = &rt6->rt6i_dst;
5470		rt6_src = &rt6->rt6i_src;
5471		rt6_flags = rt6->rt6i_flags;
5472	} else {
5473		rt6_dst = &rt->fib6_dst;
5474		rt6_src = &rt->fib6_src;
5475		rt6_flags = rt->fib6_flags;
5476	}
5477
5478	rtm = nlmsg_data(nlh);
5479	rtm->rtm_family = AF_INET6;
5480	rtm->rtm_dst_len = rt6_dst->plen;
5481	rtm->rtm_src_len = rt6_src->plen;
5482	rtm->rtm_tos = 0;
5483	if (rt->fib6_table)
5484		table = rt->fib6_table->tb6_id;
5485	else
5486		table = RT6_TABLE_UNSPEC;
5487	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5488	if (nla_put_u32(skb, RTA_TABLE, table))
5489		goto nla_put_failure;
5490
5491	rtm->rtm_type = rt->fib6_type;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5492	rtm->rtm_flags = 0;
 
 
 
 
 
5493	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5494	rtm->rtm_protocol = rt->fib6_protocol;
 
 
 
 
 
 
 
 
5495
5496	if (rt6_flags & RTF_CACHE)
5497		rtm->rtm_flags |= RTM_F_CLONED;
5498
5499	if (dest) {
5500		if (nla_put_in6_addr(skb, RTA_DST, dest))
5501			goto nla_put_failure;
5502		rtm->rtm_dst_len = 128;
5503	} else if (rtm->rtm_dst_len)
5504		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5505			goto nla_put_failure;
5506#ifdef CONFIG_IPV6_SUBTREES
5507	if (src) {
5508		if (nla_put_in6_addr(skb, RTA_SRC, src))
5509			goto nla_put_failure;
5510		rtm->rtm_src_len = 128;
5511	} else if (rtm->rtm_src_len &&
5512		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5513		goto nla_put_failure;
5514#endif
5515	if (iif) {
5516#ifdef CONFIG_IPV6_MROUTE
5517		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5518			int err = ip6mr_get_route(net, skb, rtm, portid);
5519
5520			if (err == 0)
5521				return 0;
5522			if (err < 0)
5523				goto nla_put_failure;
 
 
 
 
 
5524		} else
5525#endif
5526			if (nla_put_u32(skb, RTA_IIF, iif))
5527				goto nla_put_failure;
5528	} else if (dest) {
5529		struct in6_addr saddr_buf;
5530		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5531		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5532			goto nla_put_failure;
5533	}
5534
5535	if (rt->fib6_prefsrc.plen) {
5536		struct in6_addr saddr_buf;
5537		saddr_buf = rt->fib6_prefsrc.addr;
5538		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5539			goto nla_put_failure;
5540	}
5541
5542	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5543	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
 
 
5544		goto nla_put_failure;
5545
5546	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5547		goto nla_put_failure;
5548
5549	/* For multipath routes, walk the siblings list and add
5550	 * each as a nexthop within RTA_MULTIPATH.
5551	 */
5552	if (rt6) {
5553		if (rt6_flags & RTF_GATEWAY &&
5554		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5555			goto nla_put_failure;
5556
5557		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5558			goto nla_put_failure;
5559	} else if (rt->fib6_nsiblings) {
5560		struct fib6_info *sibling, *next_sibling;
5561		struct nlattr *mp;
5562
5563		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5564		if (!mp)
5565			goto nla_put_failure;
5566
5567		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5568				    rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
5569			goto nla_put_failure;
5570
5571		list_for_each_entry_safe(sibling, next_sibling,
5572					 &rt->fib6_siblings, fib6_siblings) {
5573			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5574					    sibling->fib6_nh->fib_nh_weight,
5575					    AF_INET6) < 0)
5576				goto nla_put_failure;
5577		}
5578
5579		nla_nest_end(skb, mp);
5580	} else if (rt->nh) {
5581		if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5582			goto nla_put_failure;
5583
5584		if (nexthop_is_blackhole(rt->nh))
5585			rtm->rtm_type = RTN_BLACKHOLE;
5586
5587		if (net->ipv4.sysctl_nexthop_compat_mode &&
5588		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5589			goto nla_put_failure;
5590
5591		rtm->rtm_flags |= nh_flags;
5592	} else {
5593		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5594				     &nh_flags, false) < 0)
5595			goto nla_put_failure;
5596
5597		rtm->rtm_flags |= nh_flags;
5598	}
5599
5600	if (rt6_flags & RTF_EXPIRES) {
5601		expires = dst ? dst->expires : rt->expires;
5602		expires -= jiffies;
5603	}
 
5604
5605	if (!dst) {
5606		if (rt->offload)
5607			rtm->rtm_flags |= RTM_F_OFFLOAD;
5608		if (rt->trap)
5609			rtm->rtm_flags |= RTM_F_TRAP;
5610	}
5611
5612	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5613		goto nla_put_failure;
5614
5615	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5616		goto nla_put_failure;
5617
 
5618
5619	nlmsg_end(skb, nlh);
5620	return 0;
5621
5622nla_put_failure:
5623	nlmsg_cancel(skb, nlh);
5624	return -EMSGSIZE;
5625}
5626
5627static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5628{
5629	const struct net_device *dev = arg;
5630
5631	if (nh->fib_nh_dev == dev)
5632		return 1;
5633
5634	return 0;
5635}
5636
5637static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5638			       const struct net_device *dev)
5639{
5640	if (f6i->nh) {
5641		struct net_device *_dev = (struct net_device *)dev;
5642
5643		return !!nexthop_for_each_fib6_nh(f6i->nh,
5644						  fib6_info_nh_uses_dev,
5645						  _dev);
5646	}
5647
5648	if (f6i->fib6_nh->fib_nh_dev == dev)
5649		return true;
5650
5651	if (f6i->fib6_nsiblings) {
5652		struct fib6_info *sibling, *next_sibling;
5653
5654		list_for_each_entry_safe(sibling, next_sibling,
5655					 &f6i->fib6_siblings, fib6_siblings) {
5656			if (sibling->fib6_nh->fib_nh_dev == dev)
5657				return true;
5658		}
5659	}
5660
5661	return false;
5662}
5663
5664struct fib6_nh_exception_dump_walker {
5665	struct rt6_rtnl_dump_arg *dump;
5666	struct fib6_info *rt;
5667	unsigned int flags;
5668	unsigned int skip;
5669	unsigned int count;
5670};
5671
5672static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5673{
5674	struct fib6_nh_exception_dump_walker *w = arg;
5675	struct rt6_rtnl_dump_arg *dump = w->dump;
5676	struct rt6_exception_bucket *bucket;
5677	struct rt6_exception *rt6_ex;
5678	int i, err;
5679
5680	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5681	if (!bucket)
5682		return 0;
5683
5684	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5685		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5686			if (w->skip) {
5687				w->skip--;
5688				continue;
5689			}
5690
5691			/* Expiration of entries doesn't bump sernum, insertion
5692			 * does. Removal is triggered by insertion, so we can
5693			 * rely on the fact that if entries change between two
5694			 * partial dumps, this node is scanned again completely,
5695			 * see rt6_insert_exception() and fib6_dump_table().
5696			 *
5697			 * Count expired entries we go through as handled
5698			 * entries that we'll skip next time, in case of partial
5699			 * node dump. Otherwise, if entries expire meanwhile,
5700			 * we'll skip the wrong amount.
5701			 */
5702			if (rt6_check_expired(rt6_ex->rt6i)) {
5703				w->count++;
5704				continue;
5705			}
5706
5707			err = rt6_fill_node(dump->net, dump->skb, w->rt,
5708					    &rt6_ex->rt6i->dst, NULL, NULL, 0,
5709					    RTM_NEWROUTE,
5710					    NETLINK_CB(dump->cb->skb).portid,
5711					    dump->cb->nlh->nlmsg_seq, w->flags);
5712			if (err)
5713				return err;
5714
5715			w->count++;
5716		}
5717		bucket++;
5718	}
5719
5720	return 0;
5721}
5722
5723/* Return -1 if done with node, number of handled routes on partial dump */
5724int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5725{
5726	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5727	struct fib_dump_filter *filter = &arg->filter;
5728	unsigned int flags = NLM_F_MULTI;
5729	struct net *net = arg->net;
5730	int count = 0;
5731
5732	if (rt == net->ipv6.fib6_null_entry)
5733		return -1;
5734
5735	if ((filter->flags & RTM_F_PREFIX) &&
5736	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
5737		/* success since this is not a prefix route */
5738		return -1;
5739	}
5740	if (filter->filter_set &&
5741	    ((filter->rt_type  && rt->fib6_type != filter->rt_type) ||
5742	     (filter->dev      && !fib6_info_uses_dev(rt, filter->dev)) ||
5743	     (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5744		return -1;
5745	}
5746
5747	if (filter->filter_set ||
5748	    !filter->dump_routes || !filter->dump_exceptions) {
5749		flags |= NLM_F_DUMP_FILTERED;
5750	}
5751
5752	if (filter->dump_routes) {
5753		if (skip) {
5754			skip--;
5755		} else {
5756			if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5757					  0, RTM_NEWROUTE,
5758					  NETLINK_CB(arg->cb->skb).portid,
5759					  arg->cb->nlh->nlmsg_seq, flags)) {
5760				return 0;
5761			}
5762			count++;
5763		}
5764	}
5765
5766	if (filter->dump_exceptions) {
5767		struct fib6_nh_exception_dump_walker w = { .dump = arg,
5768							   .rt = rt,
5769							   .flags = flags,
5770							   .skip = skip,
5771							   .count = 0 };
5772		int err;
5773
5774		rcu_read_lock();
5775		if (rt->nh) {
5776			err = nexthop_for_each_fib6_nh(rt->nh,
5777						       rt6_nh_dump_exceptions,
5778						       &w);
5779		} else {
5780			err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5781		}
5782		rcu_read_unlock();
5783
5784		if (err)
5785			return count += w.count;
5786	}
5787
5788	return -1;
5789}
5790
5791static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5792					const struct nlmsghdr *nlh,
5793					struct nlattr **tb,
5794					struct netlink_ext_ack *extack)
5795{
5796	struct rtmsg *rtm;
5797	int i, err;
5798
5799	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5800		NL_SET_ERR_MSG_MOD(extack,
5801				   "Invalid header for get route request");
5802		return -EINVAL;
5803	}
5804
5805	if (!netlink_strict_get_check(skb))
5806		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5807					      rtm_ipv6_policy, extack);
5808
5809	rtm = nlmsg_data(nlh);
5810	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5811	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5812	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5813	    rtm->rtm_type) {
5814		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5815		return -EINVAL;
5816	}
5817	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5818		NL_SET_ERR_MSG_MOD(extack,
5819				   "Invalid flags for get route request");
5820		return -EINVAL;
5821	}
5822
5823	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5824					    rtm_ipv6_policy, extack);
5825	if (err)
5826		return err;
5827
5828	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5829	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5830		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5831		return -EINVAL;
5832	}
5833
5834	for (i = 0; i <= RTA_MAX; i++) {
5835		if (!tb[i])
5836			continue;
5837
5838		switch (i) {
5839		case RTA_SRC:
5840		case RTA_DST:
5841		case RTA_IIF:
5842		case RTA_OIF:
5843		case RTA_MARK:
5844		case RTA_UID:
5845		case RTA_SPORT:
5846		case RTA_DPORT:
5847		case RTA_IP_PROTO:
5848			break;
5849		default:
5850			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
5851			return -EINVAL;
5852		}
5853	}
5854
5855	return 0;
5856}
5857
5858static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5859			      struct netlink_ext_ack *extack)
5860{
5861	struct net *net = sock_net(in_skb->sk);
5862	struct nlattr *tb[RTA_MAX+1];
5863	int err, iif = 0, oif = 0;
5864	struct fib6_info *from;
5865	struct dst_entry *dst;
5866	struct rt6_info *rt;
5867	struct sk_buff *skb;
5868	struct rtmsg *rtm;
5869	struct flowi6 fl6 = {};
5870	bool fibmatch;
5871
5872	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
5873	if (err < 0)
5874		goto errout;
5875
5876	err = -EINVAL;
5877	rtm = nlmsg_data(nlh);
5878	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
5879	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
5880
5881	if (tb[RTA_SRC]) {
5882		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
5883			goto errout;
5884
5885		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
5886	}
5887
5888	if (tb[RTA_DST]) {
5889		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
5890			goto errout;
5891
5892		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
5893	}
5894
5895	if (tb[RTA_IIF])
5896		iif = nla_get_u32(tb[RTA_IIF]);
5897
5898	if (tb[RTA_OIF])
5899		oif = nla_get_u32(tb[RTA_OIF]);
5900
5901	if (tb[RTA_MARK])
5902		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
5903
5904	if (tb[RTA_UID])
5905		fl6.flowi6_uid = make_kuid(current_user_ns(),
5906					   nla_get_u32(tb[RTA_UID]));
5907	else
5908		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5909
5910	if (tb[RTA_SPORT])
5911		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5912
5913	if (tb[RTA_DPORT])
5914		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5915
5916	if (tb[RTA_IP_PROTO]) {
5917		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5918						  &fl6.flowi6_proto, AF_INET6,
5919						  extack);
5920		if (err)
5921			goto errout;
5922	}
5923
5924	if (iif) {
5925		struct net_device *dev;
5926		int flags = 0;
5927
5928		rcu_read_lock();
5929
5930		dev = dev_get_by_index_rcu(net, iif);
5931		if (!dev) {
5932			rcu_read_unlock();
5933			err = -ENODEV;
5934			goto errout;
5935		}
5936
5937		fl6.flowi6_iif = iif;
5938
5939		if (!ipv6_addr_any(&fl6.saddr))
5940			flags |= RT6_LOOKUP_F_HAS_SADDR;
5941
5942		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5943
5944		rcu_read_unlock();
5945	} else {
5946		fl6.flowi6_oif = oif;
5947
5948		dst = ip6_route_output(net, NULL, &fl6);
5949	}
5950
 
5951
5952	rt = container_of(dst, struct rt6_info, dst);
5953	if (rt->dst.error) {
5954		err = rt->dst.error;
5955		ip6_rt_put(rt);
5956		goto errout;
5957	}
5958
5959	if (rt == net->ipv6.ip6_null_entry) {
5960		err = rt->dst.error;
5961		ip6_rt_put(rt);
5962		goto errout;
5963	}
5964
5965	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5966	if (!skb) {
5967		ip6_rt_put(rt);
5968		err = -ENOBUFS;
5969		goto errout;
5970	}
5971
 
 
 
 
 
 
5972	skb_dst_set(skb, &rt->dst);
5973
5974	rcu_read_lock();
5975	from = rcu_dereference(rt->from);
5976	if (from) {
5977		if (fibmatch)
5978			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5979					    iif, RTM_NEWROUTE,
5980					    NETLINK_CB(in_skb).portid,
5981					    nlh->nlmsg_seq, 0);
5982		else
5983			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5984					    &fl6.saddr, iif, RTM_NEWROUTE,
5985					    NETLINK_CB(in_skb).portid,
5986					    nlh->nlmsg_seq, 0);
5987	} else {
5988		err = -ENETUNREACH;
5989	}
5990	rcu_read_unlock();
5991
5992	if (err < 0) {
5993		kfree_skb(skb);
5994		goto errout;
5995	}
5996
5997	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5998errout:
5999	return err;
6000}
6001
6002void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6003		     unsigned int nlm_flags)
6004{
6005	struct sk_buff *skb;
6006	struct net *net = info->nl_net;
6007	u32 seq;
6008	int err;
6009
6010	err = -ENOBUFS;
6011	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6012
6013	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6014	if (!skb)
6015		goto errout;
6016
6017	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6018			    event, info->portid, seq, nlm_flags);
6019	if (err < 0) {
6020		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6021		WARN_ON(err == -EMSGSIZE);
6022		kfree_skb(skb);
6023		goto errout;
6024	}
6025	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6026		    info->nlh, gfp_any());
6027	return;
6028errout:
6029	if (err < 0)
6030		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6031}
6032
6033void fib6_rt_update(struct net *net, struct fib6_info *rt,
6034		    struct nl_info *info)
6035{
6036	u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6037	struct sk_buff *skb;
6038	int err = -ENOBUFS;
6039
6040	/* call_fib6_entry_notifiers will be removed when in-kernel notifier
6041	 * is implemented and supported for nexthop objects
6042	 */
6043	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
6044
6045	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6046	if (!skb)
6047		goto errout;
6048
6049	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6050			    RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6051	if (err < 0) {
6052		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6053		WARN_ON(err == -EMSGSIZE);
6054		kfree_skb(skb);
6055		goto errout;
6056	}
6057	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6058		    info->nlh, gfp_any());
6059	return;
6060errout:
6061	if (err < 0)
6062		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6063}
6064
6065static int ip6_route_dev_notify(struct notifier_block *this,
6066				unsigned long event, void *ptr)
6067{
6068	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6069	struct net *net = dev_net(dev);
6070
6071	if (!(dev->flags & IFF_LOOPBACK))
6072		return NOTIFY_OK;
6073
6074	if (event == NETDEV_REGISTER) {
6075		net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6076		net->ipv6.ip6_null_entry->dst.dev = dev;
6077		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6079		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6080		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6081		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6082		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6083#endif
6084	 } else if (event == NETDEV_UNREGISTER &&
6085		    dev->reg_state != NETREG_UNREGISTERED) {
6086		/* NETDEV_UNREGISTER could be fired for multiple times by
6087		 * netdev_wait_allrefs(). Make sure we only call this once.
6088		 */
6089		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6090#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6091		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6092		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6093#endif
6094	}
6095
6096	return NOTIFY_OK;
6097}
6098
6099/*
6100 *	/proc
6101 */
6102
6103#ifdef CONFIG_PROC_FS
 
 
 
 
 
 
 
 
 
6104static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6105{
6106	struct net *net = (struct net *)seq->private;
6107	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6108		   net->ipv6.rt6_stats->fib_nodes,
6109		   net->ipv6.rt6_stats->fib_route_nodes,
6110		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6111		   net->ipv6.rt6_stats->fib_rt_entries,
6112		   net->ipv6.rt6_stats->fib_rt_cache,
6113		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6114		   net->ipv6.rt6_stats->fib_discarded_routes);
6115
6116	return 0;
6117}
 
 
 
 
 
 
 
 
 
 
 
 
 
6118#endif	/* CONFIG_PROC_FS */
6119
6120#ifdef CONFIG_SYSCTL
6121
6122static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6123			      void *buffer, size_t *lenp, loff_t *ppos)
 
6124{
6125	struct net *net;
6126	int delay;
6127	int ret;
6128	if (!write)
6129		return -EINVAL;
6130
6131	net = (struct net *)ctl->extra1;
6132	delay = net->ipv6.sysctl.flush_delay;
6133	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6134	if (ret)
6135		return ret;
6136
6137	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6138	return 0;
6139}
6140
6141static struct ctl_table ipv6_route_table_template[] = {
6142	{
6143		.procname	=	"flush",
6144		.data		=	&init_net.ipv6.sysctl.flush_delay,
6145		.maxlen		=	sizeof(int),
6146		.mode		=	0200,
6147		.proc_handler	=	ipv6_sysctl_rtcache_flush
6148	},
6149	{
6150		.procname	=	"gc_thresh",
6151		.data		=	&ip6_dst_ops_template.gc_thresh,
6152		.maxlen		=	sizeof(int),
6153		.mode		=	0644,
6154		.proc_handler	=	proc_dointvec,
6155	},
6156	{
6157		.procname	=	"max_size",
6158		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
6159		.maxlen		=	sizeof(int),
6160		.mode		=	0644,
6161		.proc_handler	=	proc_dointvec,
6162	},
6163	{
6164		.procname	=	"gc_min_interval",
6165		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6166		.maxlen		=	sizeof(int),
6167		.mode		=	0644,
6168		.proc_handler	=	proc_dointvec_jiffies,
6169	},
6170	{
6171		.procname	=	"gc_timeout",
6172		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6173		.maxlen		=	sizeof(int),
6174		.mode		=	0644,
6175		.proc_handler	=	proc_dointvec_jiffies,
6176	},
6177	{
6178		.procname	=	"gc_interval",
6179		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
6180		.maxlen		=	sizeof(int),
6181		.mode		=	0644,
6182		.proc_handler	=	proc_dointvec_jiffies,
6183	},
6184	{
6185		.procname	=	"gc_elasticity",
6186		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6187		.maxlen		=	sizeof(int),
6188		.mode		=	0644,
6189		.proc_handler	=	proc_dointvec,
6190	},
6191	{
6192		.procname	=	"mtu_expires",
6193		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6194		.maxlen		=	sizeof(int),
6195		.mode		=	0644,
6196		.proc_handler	=	proc_dointvec_jiffies,
6197	},
6198	{
6199		.procname	=	"min_adv_mss",
6200		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
6201		.maxlen		=	sizeof(int),
6202		.mode		=	0644,
6203		.proc_handler	=	proc_dointvec,
6204	},
6205	{
6206		.procname	=	"gc_min_interval_ms",
6207		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6208		.maxlen		=	sizeof(int),
6209		.mode		=	0644,
6210		.proc_handler	=	proc_dointvec_ms_jiffies,
6211	},
6212	{
6213		.procname	=	"skip_notify_on_dev_down",
6214		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
6215		.maxlen		=	sizeof(int),
6216		.mode		=	0644,
6217		.proc_handler	=	proc_dointvec_minmax,
6218		.extra1		=	SYSCTL_ZERO,
6219		.extra2		=	SYSCTL_ONE,
6220	},
6221	{ }
6222};
6223
6224struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6225{
6226	struct ctl_table *table;
6227
6228	table = kmemdup(ipv6_route_table_template,
6229			sizeof(ipv6_route_table_template),
6230			GFP_KERNEL);
6231
6232	if (table) {
6233		table[0].data = &net->ipv6.sysctl.flush_delay;
6234		table[0].extra1 = net;
6235		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6236		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
6237		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6238		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6239		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6240		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6241		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6242		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6243		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6244		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6245
6246		/* Don't export sysctls to unprivileged users */
6247		if (net->user_ns != &init_user_ns)
6248			table[0].procname = NULL;
6249	}
6250
6251	return table;
6252}
6253#endif
6254
6255static int __net_init ip6_route_net_init(struct net *net)
6256{
6257	int ret = -ENOMEM;
6258
6259	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6260	       sizeof(net->ipv6.ip6_dst_ops));
6261
6262	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6263		goto out_ip6_dst_ops;
6264
6265	net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6266	if (!net->ipv6.fib6_null_entry)
6267		goto out_ip6_dst_entries;
6268	memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6269	       sizeof(*net->ipv6.fib6_null_entry));
6270
6271	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6272					   sizeof(*net->ipv6.ip6_null_entry),
6273					   GFP_KERNEL);
6274	if (!net->ipv6.ip6_null_entry)
6275		goto out_fib6_null_entry;
 
 
6276	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6277	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6278			 ip6_template_metrics, true);
6279	INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6280
6281#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6282	net->ipv6.fib6_has_custom_rules = false;
6283	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6284					       sizeof(*net->ipv6.ip6_prohibit_entry),
6285					       GFP_KERNEL);
6286	if (!net->ipv6.ip6_prohibit_entry)
6287		goto out_ip6_null_entry;
 
 
6288	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6289	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6290			 ip6_template_metrics, true);
6291	INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6292
6293	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6294					       sizeof(*net->ipv6.ip6_blk_hole_entry),
6295					       GFP_KERNEL);
6296	if (!net->ipv6.ip6_blk_hole_entry)
6297		goto out_ip6_prohibit_entry;
 
 
6298	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6299	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6300			 ip6_template_metrics, true);
6301	INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6302#ifdef CONFIG_IPV6_SUBTREES
6303	net->ipv6.fib6_routes_require_src = 0;
6304#endif
6305#endif
6306
6307	net->ipv6.sysctl.flush_delay = 0;
6308	net->ipv6.sysctl.ip6_rt_max_size = 4096;
6309	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6310	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6311	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6312	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6313	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6314	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6315	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6316
6317	net->ipv6.ip6_rt_gc_expire = 30*HZ;
6318
6319	ret = 0;
6320out:
6321	return ret;
6322
6323#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6324out_ip6_prohibit_entry:
6325	kfree(net->ipv6.ip6_prohibit_entry);
6326out_ip6_null_entry:
6327	kfree(net->ipv6.ip6_null_entry);
6328#endif
6329out_fib6_null_entry:
6330	kfree(net->ipv6.fib6_null_entry);
6331out_ip6_dst_entries:
6332	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6333out_ip6_dst_ops:
6334	goto out;
6335}
6336
6337static void __net_exit ip6_route_net_exit(struct net *net)
6338{
6339	kfree(net->ipv6.fib6_null_entry);
6340	kfree(net->ipv6.ip6_null_entry);
6341#ifdef CONFIG_IPV6_MULTIPLE_TABLES
6342	kfree(net->ipv6.ip6_prohibit_entry);
6343	kfree(net->ipv6.ip6_blk_hole_entry);
6344#endif
6345	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6346}
6347
6348static int __net_init ip6_route_net_init_late(struct net *net)
6349{
6350#ifdef CONFIG_PROC_FS
6351	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
6352			sizeof(struct ipv6_route_iter));
6353	proc_create_net_single("rt6_stats", 0444, net->proc_net,
6354			rt6_stats_seq_show, NULL);
6355#endif
6356	return 0;
6357}
6358
6359static void __net_exit ip6_route_net_exit_late(struct net *net)
6360{
6361#ifdef CONFIG_PROC_FS
6362	remove_proc_entry("ipv6_route", net->proc_net);
6363	remove_proc_entry("rt6_stats", net->proc_net);
6364#endif
6365}
6366
6367static struct pernet_operations ip6_route_net_ops = {
6368	.init = ip6_route_net_init,
6369	.exit = ip6_route_net_exit,
6370};
6371
6372static int __net_init ipv6_inetpeer_init(struct net *net)
6373{
6374	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6375
6376	if (!bp)
6377		return -ENOMEM;
6378	inet_peer_base_init(bp);
6379	net->ipv6.peers = bp;
6380	return 0;
6381}
6382
6383static void __net_exit ipv6_inetpeer_exit(struct net *net)
6384{
6385	struct inet_peer_base *bp = net->ipv6.peers;
6386
6387	net->ipv6.peers = NULL;
6388	inetpeer_invalidate_tree(bp);
6389	kfree(bp);
6390}
6391
6392static struct pernet_operations ipv6_inetpeer_ops = {
6393	.init	=	ipv6_inetpeer_init,
6394	.exit	=	ipv6_inetpeer_exit,
6395};
6396
6397static struct pernet_operations ip6_route_net_late_ops = {
6398	.init = ip6_route_net_init_late,
6399	.exit = ip6_route_net_exit_late,
6400};
6401
6402static struct notifier_block ip6_route_dev_notifier = {
6403	.notifier_call = ip6_route_dev_notify,
6404	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6405};
6406
6407void __init ip6_route_init_special_entries(void)
6408{
6409	/* Registering of the loopback is done before this portion of code,
6410	 * the loopback reference in rt6_info will not be taken, do it
6411	 * manually for init_net */
6412	init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6413	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6414	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6415  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6416	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6417	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6418	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6419	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6420  #endif
6421}
6422
6423#if IS_BUILTIN(CONFIG_IPV6)
6424#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6425DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6426
6427BTF_ID_LIST(btf_fib6_info_id)
6428BTF_ID(struct, fib6_info)
6429
6430static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6431	.seq_ops		= &ipv6_route_seq_ops,
6432	.init_seq_private	= bpf_iter_init_seq_net,
6433	.fini_seq_private	= bpf_iter_fini_seq_net,
6434	.seq_priv_size		= sizeof(struct ipv6_route_iter),
6435};
6436
6437static struct bpf_iter_reg ipv6_route_reg_info = {
6438	.target			= "ipv6_route",
6439	.ctx_arg_info_size	= 1,
6440	.ctx_arg_info		= {
6441		{ offsetof(struct bpf_iter__ipv6_route, rt),
6442		  PTR_TO_BTF_ID_OR_NULL },
6443	},
6444	.seq_info		= &ipv6_route_seq_info,
6445};
6446
6447static int __init bpf_iter_register(void)
6448{
6449	ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6450	return bpf_iter_reg_target(&ipv6_route_reg_info);
6451}
6452
6453static void bpf_iter_unregister(void)
6454{
6455	bpf_iter_unreg_target(&ipv6_route_reg_info);
6456}
6457#endif
6458#endif
6459
6460int __init ip6_route_init(void)
6461{
6462	int ret;
6463	int cpu;
6464
6465	ret = -ENOMEM;
6466	ip6_dst_ops_template.kmem_cachep =
6467		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6468				  SLAB_HWCACHE_ALIGN, NULL);
6469	if (!ip6_dst_ops_template.kmem_cachep)
6470		goto out;
6471
6472	ret = dst_entries_init(&ip6_dst_blackhole_ops);
6473	if (ret)
6474		goto out_kmem_cache;
6475
6476	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6477	if (ret)
6478		goto out_dst_entries;
6479
6480	ret = register_pernet_subsys(&ip6_route_net_ops);
6481	if (ret)
6482		goto out_register_inetpeer;
6483
6484	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6485
 
 
 
 
 
 
 
 
 
 
 
6486	ret = fib6_init();
6487	if (ret)
6488		goto out_register_subsys;
6489
6490	ret = xfrm6_init();
6491	if (ret)
6492		goto out_fib6_init;
6493
6494	ret = fib6_rules_init();
6495	if (ret)
6496		goto xfrm6_init;
6497
6498	ret = register_pernet_subsys(&ip6_route_net_late_ops);
6499	if (ret)
6500		goto fib6_rules_init;
6501
6502	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6503				   inet6_rtm_newroute, NULL, 0);
6504	if (ret < 0)
6505		goto out_register_late_subsys;
6506
6507	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6508				   inet6_rtm_delroute, NULL, 0);
6509	if (ret < 0)
6510		goto out_register_late_subsys;
6511
6512	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6513				   inet6_rtm_getroute, NULL,
6514				   RTNL_FLAG_DOIT_UNLOCKED);
6515	if (ret < 0)
6516		goto out_register_late_subsys;
6517
6518	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6519	if (ret)
6520		goto out_register_late_subsys;
6521
6522#if IS_BUILTIN(CONFIG_IPV6)
6523#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6524	ret = bpf_iter_register();
6525	if (ret)
6526		goto out_register_late_subsys;
6527#endif
6528#endif
6529
6530	for_each_possible_cpu(cpu) {
6531		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6532
6533		INIT_LIST_HEAD(&ul->head);
6534		spin_lock_init(&ul->lock);
6535	}
6536
6537out:
6538	return ret;
6539
6540out_register_late_subsys:
6541	rtnl_unregister_all(PF_INET6);
6542	unregister_pernet_subsys(&ip6_route_net_late_ops);
6543fib6_rules_init:
6544	fib6_rules_cleanup();
6545xfrm6_init:
6546	xfrm6_fini();
6547out_fib6_init:
6548	fib6_gc_cleanup();
6549out_register_subsys:
6550	unregister_pernet_subsys(&ip6_route_net_ops);
6551out_register_inetpeer:
6552	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6553out_dst_entries:
6554	dst_entries_destroy(&ip6_dst_blackhole_ops);
6555out_kmem_cache:
6556	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6557	goto out;
6558}
6559
6560void ip6_route_cleanup(void)
6561{
6562#if IS_BUILTIN(CONFIG_IPV6)
6563#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6564	bpf_iter_unregister();
6565#endif
6566#endif
6567	unregister_netdevice_notifier(&ip6_route_dev_notifier);
6568	unregister_pernet_subsys(&ip6_route_net_late_ops);
6569	fib6_rules_cleanup();
6570	xfrm6_fini();
6571	fib6_gc_cleanup();
6572	unregister_pernet_subsys(&ipv6_inetpeer_ops);
6573	unregister_pernet_subsys(&ip6_route_net_ops);
6574	dst_entries_destroy(&ip6_dst_blackhole_ops);
6575	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6576}