Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
v4.17
 
   1/*
   2 * Copyright (c) 2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/capability.h>
  22#include <linux/module.h>
  23#include <linux/types.h>
  24#include <linux/kernel.h>
  25#include <linux/slab.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/init.h>
  34#include <linux/in6.h>
  35#include <linux/inetdevice.h>
  36#include <linux/igmp.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/etherdevice.h>
  39#include <linux/if_ether.h>
  40#include <linux/if_vlan.h>
  41#include <linux/rculist.h>
  42#include <linux/err.h>
  43
  44#include <net/sock.h>
  45#include <net/ip.h>
  46#include <net/icmp.h>
  47#include <net/protocol.h>
  48#include <net/ip_tunnels.h>
  49#include <net/arp.h>
  50#include <net/checksum.h>
  51#include <net/dsfield.h>
  52#include <net/inet_ecn.h>
  53#include <net/xfrm.h>
  54#include <net/net_namespace.h>
  55#include <net/netns/generic.h>
  56#include <net/rtnetlink.h>
  57#include <net/udp.h>
  58#include <net/dst_metadata.h>
  59
  60#if IS_ENABLED(CONFIG_IPV6)
  61#include <net/ipv6.h>
  62#include <net/ip6_fib.h>
  63#include <net/ip6_route.h>
  64#endif
  65
  66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
  67{
  68	return hash_32((__force u32)key ^ (__force u32)remote,
  69			 IP_TNL_HASH_BITS);
  70}
  71
  72static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
  73				__be16 flags, __be32 key)
  74{
  75	if (p->i_flags & TUNNEL_KEY) {
  76		if (flags & TUNNEL_KEY)
  77			return key == p->i_key;
  78		else
  79			/* key expected, none present */
  80			return false;
  81	} else
  82		return !(flags & TUNNEL_KEY);
  83}
  84
  85/* Fallback tunnel: no source, no destination, no key, no options
  86
  87   Tunnel hash table:
  88   We require exact key match i.e. if a key is present in packet
  89   it will match only tunnel with the same key; if it is not present,
  90   it will match only keyless tunnel.
  91
  92   All keysless packets, if not matched configured keyless tunnels
  93   will match fallback tunnel.
  94   Given src, dst and key, find appropriate for input tunnel.
  95*/
  96struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  97				   int link, __be16 flags,
  98				   __be32 remote, __be32 local,
  99				   __be32 key)
 100{
 101	unsigned int hash;
 102	struct ip_tunnel *t, *cand = NULL;
 103	struct hlist_head *head;
 
 
 104
 105	hash = ip_tunnel_hash(key, remote);
 106	head = &itn->tunnels[hash];
 107
 108	hlist_for_each_entry_rcu(t, head, hash_node) {
 109		if (local != t->parms.iph.saddr ||
 110		    remote != t->parms.iph.daddr ||
 111		    !(t->dev->flags & IFF_UP))
 112			continue;
 113
 114		if (!ip_tunnel_key_match(&t->parms, flags, key))
 115			continue;
 116
 117		if (t->parms.link == link)
 118			return t;
 119		else
 120			cand = t;
 121	}
 122
 123	hlist_for_each_entry_rcu(t, head, hash_node) {
 124		if (remote != t->parms.iph.daddr ||
 125		    t->parms.iph.saddr != 0 ||
 126		    !(t->dev->flags & IFF_UP))
 127			continue;
 128
 129		if (!ip_tunnel_key_match(&t->parms, flags, key))
 130			continue;
 131
 132		if (t->parms.link == link)
 133			return t;
 134		else if (!cand)
 135			cand = t;
 136	}
 137
 138	hash = ip_tunnel_hash(key, 0);
 139	head = &itn->tunnels[hash];
 140
 141	hlist_for_each_entry_rcu(t, head, hash_node) {
 142		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
 143		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
 144			continue;
 145
 146		if (!(t->dev->flags & IFF_UP))
 147			continue;
 148
 149		if (!ip_tunnel_key_match(&t->parms, flags, key))
 150			continue;
 151
 152		if (t->parms.link == link)
 153			return t;
 154		else if (!cand)
 155			cand = t;
 156	}
 157
 158	if (flags & TUNNEL_NO_KEY)
 159		goto skip_key_lookup;
 160
 161	hlist_for_each_entry_rcu(t, head, hash_node) {
 162		if (t->parms.i_key != key ||
 163		    t->parms.iph.saddr != 0 ||
 164		    t->parms.iph.daddr != 0 ||
 165		    !(t->dev->flags & IFF_UP))
 166			continue;
 167
 168		if (t->parms.link == link)
 169			return t;
 170		else if (!cand)
 171			cand = t;
 172	}
 173
 174skip_key_lookup:
 175	if (cand)
 176		return cand;
 177
 178	t = rcu_dereference(itn->collect_md_tun);
 179	if (t && t->dev->flags & IFF_UP)
 180		return t;
 181
 182	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
 183		return netdev_priv(itn->fb_tunnel_dev);
 
 184
 185	return NULL;
 186}
 187EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
 188
 189static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 190				    struct ip_tunnel_parm *parms)
 191{
 192	unsigned int h;
 193	__be32 remote;
 194	__be32 i_key = parms->i_key;
 195
 196	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
 197		remote = parms->iph.daddr;
 198	else
 199		remote = 0;
 200
 201	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
 202		i_key = 0;
 203
 204	h = ip_tunnel_hash(i_key, remote);
 205	return &itn->tunnels[h];
 206}
 207
 208static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 209{
 210	struct hlist_head *head = ip_bucket(itn, &t->parms);
 211
 212	if (t->collect_md)
 213		rcu_assign_pointer(itn->collect_md_tun, t);
 214	hlist_add_head_rcu(&t->hash_node, head);
 215}
 216
 217static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 218{
 219	if (t->collect_md)
 220		rcu_assign_pointer(itn->collect_md_tun, NULL);
 221	hlist_del_init_rcu(&t->hash_node);
 222}
 223
 224static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 225					struct ip_tunnel_parm *parms,
 226					int type)
 227{
 228	__be32 remote = parms->iph.daddr;
 229	__be32 local = parms->iph.saddr;
 230	__be32 key = parms->i_key;
 231	__be16 flags = parms->i_flags;
 232	int link = parms->link;
 233	struct ip_tunnel *t = NULL;
 234	struct hlist_head *head = ip_bucket(itn, parms);
 235
 236	hlist_for_each_entry_rcu(t, head, hash_node) {
 237		if (local == t->parms.iph.saddr &&
 238		    remote == t->parms.iph.daddr &&
 239		    link == t->parms.link &&
 240		    type == t->dev->type &&
 241		    ip_tunnel_key_match(&t->parms, flags, key))
 242			break;
 243	}
 244	return t;
 245}
 246
 247static struct net_device *__ip_tunnel_create(struct net *net,
 248					     const struct rtnl_link_ops *ops,
 249					     struct ip_tunnel_parm *parms)
 250{
 251	int err;
 252	struct ip_tunnel *tunnel;
 253	struct net_device *dev;
 254	char name[IFNAMSIZ];
 255
 256	err = -E2BIG;
 257	if (parms->name[0]) {
 258		if (!dev_valid_name(parms->name))
 259			goto failed;
 260		strlcpy(name, parms->name, IFNAMSIZ);
 261	} else {
 262		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 263			goto failed;
 264		strlcpy(name, ops->kind, IFNAMSIZ);
 265		strncat(name, "%d", 2);
 266	}
 267
 268	ASSERT_RTNL();
 269	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 270	if (!dev) {
 271		err = -ENOMEM;
 272		goto failed;
 273	}
 274	dev_net_set(dev, net);
 275
 276	dev->rtnl_link_ops = ops;
 277
 278	tunnel = netdev_priv(dev);
 279	tunnel->parms = *parms;
 280	tunnel->net = net;
 281
 282	err = register_netdevice(dev);
 283	if (err)
 284		goto failed_free;
 285
 286	return dev;
 287
 288failed_free:
 289	free_netdev(dev);
 290failed:
 291	return ERR_PTR(err);
 292}
 293
 294static int ip_tunnel_bind_dev(struct net_device *dev)
 295{
 296	struct net_device *tdev = NULL;
 297	struct ip_tunnel *tunnel = netdev_priv(dev);
 298	const struct iphdr *iph;
 299	int hlen = LL_MAX_HEADER;
 300	int mtu = ETH_DATA_LEN;
 301	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 302
 303	iph = &tunnel->parms.iph;
 304
 305	/* Guess output device to choose reasonable mtu and needed_headroom */
 306	if (iph->daddr) {
 307		struct flowi4 fl4;
 308		struct rtable *rt;
 309
 310		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 311				    iph->saddr, tunnel->parms.o_key,
 312				    RT_TOS(iph->tos), tunnel->parms.link,
 313				    tunnel->fwmark);
 314		rt = ip_route_output_key(tunnel->net, &fl4);
 315
 316		if (!IS_ERR(rt)) {
 317			tdev = rt->dst.dev;
 318			ip_rt_put(rt);
 319		}
 320		if (dev->type != ARPHRD_ETHER)
 321			dev->flags |= IFF_POINTOPOINT;
 322
 323		dst_cache_reset(&tunnel->dst_cache);
 324	}
 325
 326	if (!tdev && tunnel->parms.link)
 327		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 328
 329	if (tdev) {
 330		hlen = tdev->hard_header_len + tdev->needed_headroom;
 331		mtu = min(tdev->mtu, IP_MAX_MTU);
 332	}
 333
 334	dev->needed_headroom = t_hlen + hlen;
 335	mtu -= (dev->hard_header_len + t_hlen);
 336
 337	if (mtu < IPV4_MIN_MTU)
 338		mtu = IPV4_MIN_MTU;
 339
 340	return mtu;
 341}
 342
 343static struct ip_tunnel *ip_tunnel_create(struct net *net,
 344					  struct ip_tunnel_net *itn,
 345					  struct ip_tunnel_parm *parms)
 346{
 347	struct ip_tunnel *nt;
 348	struct net_device *dev;
 349	int t_hlen;
 350	int mtu;
 351	int err;
 352
 353	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 354	if (IS_ERR(dev))
 355		return ERR_CAST(dev);
 356
 357	mtu = ip_tunnel_bind_dev(dev);
 358	err = dev_set_mtu(dev, mtu);
 359	if (err)
 360		goto err_dev_set_mtu;
 361
 362	nt = netdev_priv(dev);
 363	t_hlen = nt->hlen + sizeof(struct iphdr);
 364	dev->min_mtu = ETH_MIN_MTU;
 365	dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 
 
 
 366	ip_tunnel_add(itn, nt);
 367	return nt;
 368
 369err_dev_set_mtu:
 370	unregister_netdevice(dev);
 371	return ERR_PTR(err);
 372}
 373
 374int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 375		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 376		  bool log_ecn_error)
 377{
 378	struct pcpu_sw_netstats *tstats;
 379	const struct iphdr *iph = ip_hdr(skb);
 380	int err;
 381
 382#ifdef CONFIG_NET_IPGRE_BROADCAST
 383	if (ipv4_is_multicast(iph->daddr)) {
 384		tunnel->dev->stats.multicast++;
 385		skb->pkt_type = PACKET_BROADCAST;
 386	}
 387#endif
 388
 389	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
 390	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
 391		tunnel->dev->stats.rx_crc_errors++;
 392		tunnel->dev->stats.rx_errors++;
 393		goto drop;
 394	}
 395
 396	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
 397		if (!(tpi->flags&TUNNEL_SEQ) ||
 398		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 399			tunnel->dev->stats.rx_fifo_errors++;
 400			tunnel->dev->stats.rx_errors++;
 401			goto drop;
 402		}
 403		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 404	}
 405
 406	skb_reset_network_header(skb);
 407
 408	err = IP_ECN_decapsulate(iph, skb);
 409	if (unlikely(err)) {
 410		if (log_ecn_error)
 411			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 412					&iph->saddr, iph->tos);
 413		if (err > 1) {
 414			++tunnel->dev->stats.rx_frame_errors;
 415			++tunnel->dev->stats.rx_errors;
 416			goto drop;
 417		}
 418	}
 419
 420	tstats = this_cpu_ptr(tunnel->dev->tstats);
 421	u64_stats_update_begin(&tstats->syncp);
 422	tstats->rx_packets++;
 423	tstats->rx_bytes += skb->len;
 424	u64_stats_update_end(&tstats->syncp);
 425
 426	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 427
 428	if (tunnel->dev->type == ARPHRD_ETHER) {
 429		skb->protocol = eth_type_trans(skb, tunnel->dev);
 430		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 431	} else {
 432		skb->dev = tunnel->dev;
 433	}
 434
 435	if (tun_dst)
 436		skb_dst_set(skb, (struct dst_entry *)tun_dst);
 437
 438	gro_cells_receive(&tunnel->gro_cells, skb);
 439	return 0;
 440
 441drop:
 442	if (tun_dst)
 443		dst_release((struct dst_entry *)tun_dst);
 444	kfree_skb(skb);
 445	return 0;
 446}
 447EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 448
 449int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 450			    unsigned int num)
 451{
 452	if (num >= MAX_IPTUN_ENCAP_OPS)
 453		return -ERANGE;
 454
 455	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 456			&iptun_encaps[num],
 457			NULL, ops) ? 0 : -1;
 458}
 459EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
 460
 461int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 462			    unsigned int num)
 463{
 464	int ret;
 465
 466	if (num >= MAX_IPTUN_ENCAP_OPS)
 467		return -ERANGE;
 468
 469	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 470		       &iptun_encaps[num],
 471		       ops, NULL) == ops) ? 0 : -1;
 472
 473	synchronize_net();
 474
 475	return ret;
 476}
 477EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
 478
 479int ip_tunnel_encap_setup(struct ip_tunnel *t,
 480			  struct ip_tunnel_encap *ipencap)
 481{
 482	int hlen;
 483
 484	memset(&t->encap, 0, sizeof(t->encap));
 485
 486	hlen = ip_encap_hlen(ipencap);
 487	if (hlen < 0)
 488		return hlen;
 489
 490	t->encap.type = ipencap->type;
 491	t->encap.sport = ipencap->sport;
 492	t->encap.dport = ipencap->dport;
 493	t->encap.flags = ipencap->flags;
 494
 495	t->encap_hlen = hlen;
 496	t->hlen = t->encap_hlen + t->tun_hlen;
 497
 498	return 0;
 499}
 500EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 501
 502static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 503			    struct rtable *rt, __be16 df,
 504			    const struct iphdr *inner_iph)
 
 505{
 506	struct ip_tunnel *tunnel = netdev_priv(dev);
 507	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
 508	int mtu;
 509
 510	if (df)
 511		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 512					- sizeof(struct iphdr) - tunnel->hlen;
 513	else
 514		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
 
 
 
 
 515
 516	skb_dst_update_pmtu(skb, mtu);
 
 517
 518	if (skb->protocol == htons(ETH_P_IP)) {
 519		if (!skb_is_gso(skb) &&
 520		    (inner_iph->frag_off & htons(IP_DF)) &&
 521		    mtu < pkt_size) {
 522			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 523			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 524			return -E2BIG;
 525		}
 526	}
 527#if IS_ENABLED(CONFIG_IPV6)
 528	else if (skb->protocol == htons(ETH_P_IPV6)) {
 529		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
 
 
 
 
 530
 531		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 532			   mtu >= IPV6_MIN_MTU) {
 533			if ((tunnel->parms.iph.daddr &&
 534			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 535			    rt6->rt6i_dst.plen == 128) {
 536				rt6->rt6i_flags |= RTF_MODIFIED;
 537				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 538			}
 539		}
 540
 541		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 542					mtu < pkt_size) {
 543			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 544			return -E2BIG;
 545		}
 546	}
 547#endif
 548	return 0;
 549}
 550
 551void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 
 552{
 553	struct ip_tunnel *tunnel = netdev_priv(dev);
 554	u32 headroom = sizeof(struct iphdr);
 555	struct ip_tunnel_info *tun_info;
 556	const struct ip_tunnel_key *key;
 557	const struct iphdr *inner_iph;
 558	struct rtable *rt;
 559	struct flowi4 fl4;
 560	__be16 df = 0;
 561	u8 tos, ttl;
 
 562
 563	tun_info = skb_tunnel_info(skb);
 564	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 565		     ip_tunnel_info_af(tun_info) != AF_INET))
 566		goto tx_error;
 567	key = &tun_info->key;
 568	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 569	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 570	tos = key->tos;
 571	if (tos == 1) {
 572		if (skb->protocol == htons(ETH_P_IP))
 573			tos = inner_iph->tos;
 574		else if (skb->protocol == htons(ETH_P_IPV6))
 575			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 576	}
 577	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
 578			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
 
 
 579	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 580		goto tx_error;
 581	rt = ip_route_output_key(tunnel->net, &fl4);
 582	if (IS_ERR(rt)) {
 583		dev->stats.tx_carrier_errors++;
 584		goto tx_error;
 
 
 
 
 
 
 
 
 
 585	}
 586	if (rt->dst.dev == dev) {
 587		ip_rt_put(rt);
 588		dev->stats.collisions++;
 589		goto tx_error;
 590	}
 
 
 
 
 
 
 
 
 
 591	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 592	ttl = key->ttl;
 593	if (ttl == 0) {
 594		if (skb->protocol == htons(ETH_P_IP))
 595			ttl = inner_iph->ttl;
 596		else if (skb->protocol == htons(ETH_P_IPV6))
 597			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 598		else
 599			ttl = ip4_dst_hoplimit(&rt->dst);
 600	}
 601	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
 602		df = htons(IP_DF);
 603	else if (skb->protocol == htons(ETH_P_IP))
 604		df = inner_iph->frag_off & htons(IP_DF);
 605	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 606	if (headroom > dev->needed_headroom)
 607		dev->needed_headroom = headroom;
 608
 609	if (skb_cow_head(skb, dev->needed_headroom)) {
 610		ip_rt_put(rt);
 611		goto tx_dropped;
 612	}
 613	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
 614		      df, !net_eq(tunnel->net, dev_net(dev)));
 615	return;
 616tx_error:
 617	dev->stats.tx_errors++;
 618	goto kfree;
 619tx_dropped:
 620	dev->stats.tx_dropped++;
 621kfree:
 622	kfree_skb(skb);
 623}
 624EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
 625
 626void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 627		    const struct iphdr *tnl_params, u8 protocol)
 628{
 629	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 630	const struct iphdr *inner_iph;
 631	struct flowi4 fl4;
 632	u8     tos, ttl;
 633	__be16 df;
 634	struct rtable *rt;		/* Route to the other host */
 635	unsigned int max_headroom;	/* The extra header space needed */
 636	__be32 dst;
 
 
 
 
 637	bool connected;
 
 
 
 638
 639	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 640	connected = (tunnel->parms.iph.daddr != 0);
 
 641
 642	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 643
 644	dst = tnl_params->daddr;
 645	if (dst == 0) {
 646		/* NBMA tunnel */
 647
 648		if (!skb_dst(skb)) {
 649			dev->stats.tx_fifo_errors++;
 650			goto tx_error;
 651		}
 652
 653		if (skb->protocol == htons(ETH_P_IP)) {
 
 
 
 
 
 
 
 654			rt = skb_rtable(skb);
 655			dst = rt_nexthop(rt, inner_iph->daddr);
 656		}
 657#if IS_ENABLED(CONFIG_IPV6)
 658		else if (skb->protocol == htons(ETH_P_IPV6)) {
 659			const struct in6_addr *addr6;
 660			struct neighbour *neigh;
 661			bool do_tx_error_icmp;
 662			int addr_type;
 663
 664			neigh = dst_neigh_lookup(skb_dst(skb),
 665						 &ipv6_hdr(skb)->daddr);
 666			if (!neigh)
 667				goto tx_error;
 668
 669			addr6 = (const struct in6_addr *)&neigh->primary_key;
 670			addr_type = ipv6_addr_type(addr6);
 671
 672			if (addr_type == IPV6_ADDR_ANY) {
 673				addr6 = &ipv6_hdr(skb)->daddr;
 674				addr_type = ipv6_addr_type(addr6);
 675			}
 676
 677			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 678				do_tx_error_icmp = true;
 679			else {
 680				do_tx_error_icmp = false;
 681				dst = addr6->s6_addr32[3];
 682			}
 683			neigh_release(neigh);
 684			if (do_tx_error_icmp)
 685				goto tx_error_icmp;
 686		}
 687#endif
 688		else
 689			goto tx_error;
 690
 691		connected = false;
 
 692	}
 693
 694	tos = tnl_params->tos;
 695	if (tos & 0x1) {
 696		tos &= ~0x1;
 697		if (skb->protocol == htons(ETH_P_IP)) {
 698			tos = inner_iph->tos;
 699			connected = false;
 700		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 701			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 702			connected = false;
 703		}
 704	}
 705
 706	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 707			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
 708			    tunnel->fwmark);
 
 709
 710	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 711		goto tx_error;
 712
 713	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
 714			 NULL;
 
 
 
 
 
 
 
 715
 716	if (!rt) {
 717		rt = ip_route_output_key(tunnel->net, &fl4);
 718
 719		if (IS_ERR(rt)) {
 720			dev->stats.tx_carrier_errors++;
 721			goto tx_error;
 722		}
 723		if (connected)
 
 
 
 724			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 725					  fl4.saddr);
 726	}
 727
 728	if (rt->dst.dev == dev) {
 729		ip_rt_put(rt);
 730		dev->stats.collisions++;
 731		goto tx_error;
 732	}
 733
 734	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
 
 
 
 
 735		ip_rt_put(rt);
 736		goto tx_error;
 737	}
 738
 739	if (tunnel->err_count > 0) {
 740		if (time_before(jiffies,
 741				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 742			tunnel->err_count--;
 743
 744			dst_link_failure(skb);
 745		} else
 746			tunnel->err_count = 0;
 747	}
 748
 749	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 750	ttl = tnl_params->ttl;
 751	if (ttl == 0) {
 752		if (skb->protocol == htons(ETH_P_IP))
 753			ttl = inner_iph->ttl;
 754#if IS_ENABLED(CONFIG_IPV6)
 755		else if (skb->protocol == htons(ETH_P_IPV6))
 756			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 757#endif
 758		else
 759			ttl = ip4_dst_hoplimit(&rt->dst);
 760	}
 761
 762	df = tnl_params->frag_off;
 763	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
 764		df |= (inner_iph->frag_off&htons(IP_DF));
 765
 766	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 767			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 768	if (max_headroom > dev->needed_headroom)
 769		dev->needed_headroom = max_headroom;
 770
 771	if (skb_cow_head(skb, dev->needed_headroom)) {
 772		ip_rt_put(rt);
 773		dev->stats.tx_dropped++;
 774		kfree_skb(skb);
 775		return;
 776	}
 777
 778	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
 779		      df, !net_eq(tunnel->net, dev_net(dev)));
 780	return;
 781
 782#if IS_ENABLED(CONFIG_IPV6)
 783tx_error_icmp:
 784	dst_link_failure(skb);
 785#endif
 786tx_error:
 787	dev->stats.tx_errors++;
 788	kfree_skb(skb);
 789}
 790EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
 791
 792static void ip_tunnel_update(struct ip_tunnel_net *itn,
 793			     struct ip_tunnel *t,
 794			     struct net_device *dev,
 795			     struct ip_tunnel_parm *p,
 796			     bool set_mtu,
 797			     __u32 fwmark)
 798{
 799	ip_tunnel_del(itn, t);
 800	t->parms.iph.saddr = p->iph.saddr;
 801	t->parms.iph.daddr = p->iph.daddr;
 802	t->parms.i_key = p->i_key;
 803	t->parms.o_key = p->o_key;
 804	if (dev->type != ARPHRD_ETHER) {
 805		memcpy(dev->dev_addr, &p->iph.saddr, 4);
 806		memcpy(dev->broadcast, &p->iph.daddr, 4);
 807	}
 808	ip_tunnel_add(itn, t);
 809
 810	t->parms.iph.ttl = p->iph.ttl;
 811	t->parms.iph.tos = p->iph.tos;
 812	t->parms.iph.frag_off = p->iph.frag_off;
 813
 814	if (t->parms.link != p->link || t->fwmark != fwmark) {
 815		int mtu;
 816
 817		t->parms.link = p->link;
 818		t->fwmark = fwmark;
 819		mtu = ip_tunnel_bind_dev(dev);
 820		if (set_mtu)
 821			dev->mtu = mtu;
 822	}
 823	dst_cache_reset(&t->dst_cache);
 824	netdev_state_change(dev);
 825}
 826
 827int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 828{
 829	int err = 0;
 830	struct ip_tunnel *t = netdev_priv(dev);
 831	struct net *net = t->net;
 832	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 833
 834	switch (cmd) {
 835	case SIOCGETTUNNEL:
 836		if (dev == itn->fb_tunnel_dev) {
 837			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 838			if (!t)
 839				t = netdev_priv(dev);
 840		}
 841		memcpy(p, &t->parms, sizeof(*p));
 842		break;
 843
 844	case SIOCADDTUNNEL:
 845	case SIOCCHGTUNNEL:
 846		err = -EPERM;
 847		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 848			goto done;
 849		if (p->iph.ttl)
 850			p->iph.frag_off |= htons(IP_DF);
 851		if (!(p->i_flags & VTI_ISVTI)) {
 852			if (!(p->i_flags & TUNNEL_KEY))
 853				p->i_key = 0;
 854			if (!(p->o_flags & TUNNEL_KEY))
 855				p->o_key = 0;
 856		}
 857
 858		t = ip_tunnel_find(itn, p, itn->type);
 859
 860		if (cmd == SIOCADDTUNNEL) {
 861			if (!t) {
 862				t = ip_tunnel_create(net, itn, p);
 863				err = PTR_ERR_OR_ZERO(t);
 864				break;
 865			}
 866
 867			err = -EEXIST;
 868			break;
 869		}
 870		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 871			if (t) {
 872				if (t->dev != dev) {
 873					err = -EEXIST;
 874					break;
 875				}
 876			} else {
 877				unsigned int nflags = 0;
 878
 879				if (ipv4_is_multicast(p->iph.daddr))
 880					nflags = IFF_BROADCAST;
 881				else if (p->iph.daddr)
 882					nflags = IFF_POINTOPOINT;
 883
 884				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 885					err = -EINVAL;
 886					break;
 887				}
 888
 889				t = netdev_priv(dev);
 890			}
 891		}
 892
 893		if (t) {
 894			err = 0;
 895			ip_tunnel_update(itn, t, dev, p, true, 0);
 896		} else {
 897			err = -ENOENT;
 898		}
 899		break;
 900
 901	case SIOCDELTUNNEL:
 902		err = -EPERM;
 903		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 904			goto done;
 905
 906		if (dev == itn->fb_tunnel_dev) {
 907			err = -ENOENT;
 908			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 909			if (!t)
 910				goto done;
 911			err = -EPERM;
 912			if (t == netdev_priv(itn->fb_tunnel_dev))
 913				goto done;
 914			dev = t->dev;
 915		}
 916		unregister_netdevice(dev);
 917		err = 0;
 918		break;
 919
 920	default:
 921		err = -EINVAL;
 922	}
 923
 924done:
 925	return err;
 926}
 927EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 928
 929int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 930{
 931	struct ip_tunnel *tunnel = netdev_priv(dev);
 932	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 933	int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 
 
 
 934
 935	if (new_mtu < ETH_MIN_MTU)
 936		return -EINVAL;
 937
 938	if (new_mtu > max_mtu) {
 939		if (strict)
 940			return -EINVAL;
 941
 942		new_mtu = max_mtu;
 943	}
 944
 945	dev->mtu = new_mtu;
 946	return 0;
 947}
 948EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
 949
 950int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 951{
 952	return __ip_tunnel_change_mtu(dev, new_mtu, true);
 953}
 954EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 955
 956static void ip_tunnel_dev_free(struct net_device *dev)
 957{
 958	struct ip_tunnel *tunnel = netdev_priv(dev);
 959
 960	gro_cells_destroy(&tunnel->gro_cells);
 961	dst_cache_destroy(&tunnel->dst_cache);
 962	free_percpu(dev->tstats);
 963}
 964
 965void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
 966{
 967	struct ip_tunnel *tunnel = netdev_priv(dev);
 968	struct ip_tunnel_net *itn;
 969
 970	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
 971
 972	if (itn->fb_tunnel_dev != dev) {
 973		ip_tunnel_del(itn, netdev_priv(dev));
 974		unregister_netdevice_queue(dev, head);
 975	}
 976}
 977EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
 978
 979struct net *ip_tunnel_get_link_net(const struct net_device *dev)
 980{
 981	struct ip_tunnel *tunnel = netdev_priv(dev);
 982
 983	return tunnel->net;
 984}
 985EXPORT_SYMBOL(ip_tunnel_get_link_net);
 986
 987int ip_tunnel_get_iflink(const struct net_device *dev)
 988{
 989	struct ip_tunnel *tunnel = netdev_priv(dev);
 990
 991	return tunnel->parms.link;
 992}
 993EXPORT_SYMBOL(ip_tunnel_get_iflink);
 994
 995int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 996				  struct rtnl_link_ops *ops, char *devname)
 997{
 998	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
 999	struct ip_tunnel_parm parms;
1000	unsigned int i;
1001
1002	itn->rtnl_link_ops = ops;
1003	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1004		INIT_HLIST_HEAD(&itn->tunnels[i]);
1005
1006	if (!ops || !net_has_fallback_tunnels(net)) {
1007		struct ip_tunnel_net *it_init_net;
1008
1009		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1010		itn->type = it_init_net->type;
1011		itn->fb_tunnel_dev = NULL;
1012		return 0;
1013	}
1014
1015	memset(&parms, 0, sizeof(parms));
1016	if (devname)
1017		strlcpy(parms.name, devname, IFNAMSIZ);
1018
1019	rtnl_lock();
1020	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1021	/* FB netdevice is special: we have one, and only one per netns.
1022	 * Allowing to move it to another netns is clearly unsafe.
1023	 */
1024	if (!IS_ERR(itn->fb_tunnel_dev)) {
1025		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1026		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1027		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1028		itn->type = itn->fb_tunnel_dev->type;
1029	}
1030	rtnl_unlock();
1031
1032	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1033}
1034EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1035
1036static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1037			      struct list_head *head,
1038			      struct rtnl_link_ops *ops)
1039{
1040	struct net_device *dev, *aux;
1041	int h;
1042
1043	for_each_netdev_safe(net, dev, aux)
1044		if (dev->rtnl_link_ops == ops)
1045			unregister_netdevice_queue(dev, head);
1046
1047	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1048		struct ip_tunnel *t;
1049		struct hlist_node *n;
1050		struct hlist_head *thead = &itn->tunnels[h];
1051
1052		hlist_for_each_entry_safe(t, n, thead, hash_node)
1053			/* If dev is in the same netns, it has already
1054			 * been added to the list by the previous loop.
1055			 */
1056			if (!net_eq(dev_net(t->dev), net))
1057				unregister_netdevice_queue(t->dev, head);
1058	}
1059}
1060
1061void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1062			   struct rtnl_link_ops *ops)
1063{
1064	struct ip_tunnel_net *itn;
1065	struct net *net;
1066	LIST_HEAD(list);
1067
1068	rtnl_lock();
1069	list_for_each_entry(net, net_list, exit_list) {
1070		itn = net_generic(net, id);
1071		ip_tunnel_destroy(net, itn, &list, ops);
1072	}
1073	unregister_netdevice_many(&list);
1074	rtnl_unlock();
1075}
1076EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1077
1078int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1079		      struct ip_tunnel_parm *p, __u32 fwmark)
1080{
1081	struct ip_tunnel *nt;
1082	struct net *net = dev_net(dev);
1083	struct ip_tunnel_net *itn;
1084	int mtu;
1085	int err;
1086
1087	nt = netdev_priv(dev);
1088	itn = net_generic(net, nt->ip_tnl_net_id);
1089
1090	if (nt->collect_md) {
1091		if (rtnl_dereference(itn->collect_md_tun))
1092			return -EEXIST;
1093	} else {
1094		if (ip_tunnel_find(itn, p, dev->type))
1095			return -EEXIST;
1096	}
1097
1098	nt->net = net;
1099	nt->parms = *p;
1100	nt->fwmark = fwmark;
1101	err = register_netdevice(dev);
1102	if (err)
1103		goto err_register_netdevice;
1104
1105	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1106		eth_hw_addr_random(dev);
1107
1108	mtu = ip_tunnel_bind_dev(dev);
1109	if (tb[IFLA_MTU]) {
1110		unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1111
1112		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1113			    (unsigned int)(max - sizeof(struct iphdr)));
 
 
1114	}
1115
1116	err = dev_set_mtu(dev, mtu);
1117	if (err)
1118		goto err_dev_set_mtu;
1119
1120	ip_tunnel_add(itn, nt);
1121	return 0;
1122
1123err_dev_set_mtu:
1124	unregister_netdevice(dev);
1125err_register_netdevice:
1126	return err;
1127}
1128EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1129
1130int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1131			 struct ip_tunnel_parm *p, __u32 fwmark)
1132{
1133	struct ip_tunnel *t;
1134	struct ip_tunnel *tunnel = netdev_priv(dev);
1135	struct net *net = tunnel->net;
1136	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1137
1138	if (dev == itn->fb_tunnel_dev)
1139		return -EINVAL;
1140
1141	t = ip_tunnel_find(itn, p, dev->type);
1142
1143	if (t) {
1144		if (t->dev != dev)
1145			return -EEXIST;
1146	} else {
1147		t = tunnel;
1148
1149		if (dev->type != ARPHRD_ETHER) {
1150			unsigned int nflags = 0;
1151
1152			if (ipv4_is_multicast(p->iph.daddr))
1153				nflags = IFF_BROADCAST;
1154			else if (p->iph.daddr)
1155				nflags = IFF_POINTOPOINT;
1156
1157			if ((dev->flags ^ nflags) &
1158			    (IFF_POINTOPOINT | IFF_BROADCAST))
1159				return -EINVAL;
1160		}
1161	}
1162
1163	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1164	return 0;
1165}
1166EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1167
1168int ip_tunnel_init(struct net_device *dev)
1169{
1170	struct ip_tunnel *tunnel = netdev_priv(dev);
1171	struct iphdr *iph = &tunnel->parms.iph;
1172	int err;
1173
1174	dev->needs_free_netdev = true;
1175	dev->priv_destructor = ip_tunnel_dev_free;
1176	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1177	if (!dev->tstats)
1178		return -ENOMEM;
1179
1180	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1181	if (err) {
1182		free_percpu(dev->tstats);
1183		return err;
1184	}
1185
1186	err = gro_cells_init(&tunnel->gro_cells, dev);
1187	if (err) {
1188		dst_cache_destroy(&tunnel->dst_cache);
1189		free_percpu(dev->tstats);
1190		return err;
1191	}
1192
1193	tunnel->dev = dev;
1194	tunnel->net = dev_net(dev);
1195	strcpy(tunnel->parms.name, dev->name);
1196	iph->version		= 4;
1197	iph->ihl		= 5;
1198
1199	if (tunnel->collect_md) {
1200		dev->features |= NETIF_F_NETNS_LOCAL;
1201		netif_keep_dst(dev);
1202	}
1203	return 0;
1204}
1205EXPORT_SYMBOL_GPL(ip_tunnel_init);
1206
1207void ip_tunnel_uninit(struct net_device *dev)
1208{
1209	struct ip_tunnel *tunnel = netdev_priv(dev);
1210	struct net *net = tunnel->net;
1211	struct ip_tunnel_net *itn;
1212
1213	itn = net_generic(net, tunnel->ip_tnl_net_id);
1214	/* fb_tunnel_dev will be unregisted in net-exit call. */
1215	if (itn->fb_tunnel_dev != dev)
1216		ip_tunnel_del(itn, netdev_priv(dev));
1217
1218	dst_cache_reset(&tunnel->dst_cache);
1219}
1220EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1221
1222/* Do least required initialization, rest of init is done in tunnel_init call */
1223void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1224{
1225	struct ip_tunnel *tunnel = netdev_priv(dev);
1226	tunnel->ip_tnl_net_id = net_id;
1227}
1228EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1229
1230MODULE_LICENSE("GPL");
v6.2
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2013 Nicira, Inc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/capability.h>
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/slab.h>
  13#include <linux/uaccess.h>
  14#include <linux/skbuff.h>
  15#include <linux/netdevice.h>
  16#include <linux/in.h>
  17#include <linux/tcp.h>
  18#include <linux/udp.h>
  19#include <linux/if_arp.h>
  20#include <linux/init.h>
  21#include <linux/in6.h>
  22#include <linux/inetdevice.h>
  23#include <linux/igmp.h>
  24#include <linux/netfilter_ipv4.h>
  25#include <linux/etherdevice.h>
  26#include <linux/if_ether.h>
  27#include <linux/if_vlan.h>
  28#include <linux/rculist.h>
  29#include <linux/err.h>
  30
  31#include <net/sock.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/protocol.h>
  35#include <net/ip_tunnels.h>
  36#include <net/arp.h>
  37#include <net/checksum.h>
  38#include <net/dsfield.h>
  39#include <net/inet_ecn.h>
  40#include <net/xfrm.h>
  41#include <net/net_namespace.h>
  42#include <net/netns/generic.h>
  43#include <net/rtnetlink.h>
  44#include <net/udp.h>
  45#include <net/dst_metadata.h>
  46
  47#if IS_ENABLED(CONFIG_IPV6)
  48#include <net/ipv6.h>
  49#include <net/ip6_fib.h>
  50#include <net/ip6_route.h>
  51#endif
  52
  53static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
  54{
  55	return hash_32((__force u32)key ^ (__force u32)remote,
  56			 IP_TNL_HASH_BITS);
  57}
  58
  59static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
  60				__be16 flags, __be32 key)
  61{
  62	if (p->i_flags & TUNNEL_KEY) {
  63		if (flags & TUNNEL_KEY)
  64			return key == p->i_key;
  65		else
  66			/* key expected, none present */
  67			return false;
  68	} else
  69		return !(flags & TUNNEL_KEY);
  70}
  71
  72/* Fallback tunnel: no source, no destination, no key, no options
  73
  74   Tunnel hash table:
  75   We require exact key match i.e. if a key is present in packet
  76   it will match only tunnel with the same key; if it is not present,
  77   it will match only keyless tunnel.
  78
  79   All keysless packets, if not matched configured keyless tunnels
  80   will match fallback tunnel.
  81   Given src, dst and key, find appropriate for input tunnel.
  82*/
  83struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  84				   int link, __be16 flags,
  85				   __be32 remote, __be32 local,
  86				   __be32 key)
  87{
 
  88	struct ip_tunnel *t, *cand = NULL;
  89	struct hlist_head *head;
  90	struct net_device *ndev;
  91	unsigned int hash;
  92
  93	hash = ip_tunnel_hash(key, remote);
  94	head = &itn->tunnels[hash];
  95
  96	hlist_for_each_entry_rcu(t, head, hash_node) {
  97		if (local != t->parms.iph.saddr ||
  98		    remote != t->parms.iph.daddr ||
  99		    !(t->dev->flags & IFF_UP))
 100			continue;
 101
 102		if (!ip_tunnel_key_match(&t->parms, flags, key))
 103			continue;
 104
 105		if (t->parms.link == link)
 106			return t;
 107		else
 108			cand = t;
 109	}
 110
 111	hlist_for_each_entry_rcu(t, head, hash_node) {
 112		if (remote != t->parms.iph.daddr ||
 113		    t->parms.iph.saddr != 0 ||
 114		    !(t->dev->flags & IFF_UP))
 115			continue;
 116
 117		if (!ip_tunnel_key_match(&t->parms, flags, key))
 118			continue;
 119
 120		if (t->parms.link == link)
 121			return t;
 122		else if (!cand)
 123			cand = t;
 124	}
 125
 126	hash = ip_tunnel_hash(key, 0);
 127	head = &itn->tunnels[hash];
 128
 129	hlist_for_each_entry_rcu(t, head, hash_node) {
 130		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
 131		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
 132			continue;
 133
 134		if (!(t->dev->flags & IFF_UP))
 135			continue;
 136
 137		if (!ip_tunnel_key_match(&t->parms, flags, key))
 138			continue;
 139
 140		if (t->parms.link == link)
 141			return t;
 142		else if (!cand)
 143			cand = t;
 144	}
 145
 
 
 
 146	hlist_for_each_entry_rcu(t, head, hash_node) {
 147		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
 148		    t->parms.iph.saddr != 0 ||
 149		    t->parms.iph.daddr != 0 ||
 150		    !(t->dev->flags & IFF_UP))
 151			continue;
 152
 153		if (t->parms.link == link)
 154			return t;
 155		else if (!cand)
 156			cand = t;
 157	}
 158
 
 159	if (cand)
 160		return cand;
 161
 162	t = rcu_dereference(itn->collect_md_tun);
 163	if (t && t->dev->flags & IFF_UP)
 164		return t;
 165
 166	ndev = READ_ONCE(itn->fb_tunnel_dev);
 167	if (ndev && ndev->flags & IFF_UP)
 168		return netdev_priv(ndev);
 169
 170	return NULL;
 171}
 172EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
 173
 174static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 175				    struct ip_tunnel_parm *parms)
 176{
 177	unsigned int h;
 178	__be32 remote;
 179	__be32 i_key = parms->i_key;
 180
 181	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
 182		remote = parms->iph.daddr;
 183	else
 184		remote = 0;
 185
 186	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
 187		i_key = 0;
 188
 189	h = ip_tunnel_hash(i_key, remote);
 190	return &itn->tunnels[h];
 191}
 192
 193static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 194{
 195	struct hlist_head *head = ip_bucket(itn, &t->parms);
 196
 197	if (t->collect_md)
 198		rcu_assign_pointer(itn->collect_md_tun, t);
 199	hlist_add_head_rcu(&t->hash_node, head);
 200}
 201
 202static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 203{
 204	if (t->collect_md)
 205		rcu_assign_pointer(itn->collect_md_tun, NULL);
 206	hlist_del_init_rcu(&t->hash_node);
 207}
 208
 209static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 210					struct ip_tunnel_parm *parms,
 211					int type)
 212{
 213	__be32 remote = parms->iph.daddr;
 214	__be32 local = parms->iph.saddr;
 215	__be32 key = parms->i_key;
 216	__be16 flags = parms->i_flags;
 217	int link = parms->link;
 218	struct ip_tunnel *t = NULL;
 219	struct hlist_head *head = ip_bucket(itn, parms);
 220
 221	hlist_for_each_entry_rcu(t, head, hash_node) {
 222		if (local == t->parms.iph.saddr &&
 223		    remote == t->parms.iph.daddr &&
 224		    link == t->parms.link &&
 225		    type == t->dev->type &&
 226		    ip_tunnel_key_match(&t->parms, flags, key))
 227			break;
 228	}
 229	return t;
 230}
 231
 232static struct net_device *__ip_tunnel_create(struct net *net,
 233					     const struct rtnl_link_ops *ops,
 234					     struct ip_tunnel_parm *parms)
 235{
 236	int err;
 237	struct ip_tunnel *tunnel;
 238	struct net_device *dev;
 239	char name[IFNAMSIZ];
 240
 241	err = -E2BIG;
 242	if (parms->name[0]) {
 243		if (!dev_valid_name(parms->name))
 244			goto failed;
 245		strscpy(name, parms->name, IFNAMSIZ);
 246	} else {
 247		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 248			goto failed;
 249		strcpy(name, ops->kind);
 250		strcat(name, "%d");
 251	}
 252
 253	ASSERT_RTNL();
 254	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 255	if (!dev) {
 256		err = -ENOMEM;
 257		goto failed;
 258	}
 259	dev_net_set(dev, net);
 260
 261	dev->rtnl_link_ops = ops;
 262
 263	tunnel = netdev_priv(dev);
 264	tunnel->parms = *parms;
 265	tunnel->net = net;
 266
 267	err = register_netdevice(dev);
 268	if (err)
 269		goto failed_free;
 270
 271	return dev;
 272
 273failed_free:
 274	free_netdev(dev);
 275failed:
 276	return ERR_PTR(err);
 277}
 278
 279static int ip_tunnel_bind_dev(struct net_device *dev)
 280{
 281	struct net_device *tdev = NULL;
 282	struct ip_tunnel *tunnel = netdev_priv(dev);
 283	const struct iphdr *iph;
 284	int hlen = LL_MAX_HEADER;
 285	int mtu = ETH_DATA_LEN;
 286	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 287
 288	iph = &tunnel->parms.iph;
 289
 290	/* Guess output device to choose reasonable mtu and needed_headroom */
 291	if (iph->daddr) {
 292		struct flowi4 fl4;
 293		struct rtable *rt;
 294
 295		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 296				    iph->saddr, tunnel->parms.o_key,
 297				    RT_TOS(iph->tos), dev_net(dev),
 298				    tunnel->parms.link, tunnel->fwmark, 0, 0);
 299		rt = ip_route_output_key(tunnel->net, &fl4);
 300
 301		if (!IS_ERR(rt)) {
 302			tdev = rt->dst.dev;
 303			ip_rt_put(rt);
 304		}
 305		if (dev->type != ARPHRD_ETHER)
 306			dev->flags |= IFF_POINTOPOINT;
 307
 308		dst_cache_reset(&tunnel->dst_cache);
 309	}
 310
 311	if (!tdev && tunnel->parms.link)
 312		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 313
 314	if (tdev) {
 315		hlen = tdev->hard_header_len + tdev->needed_headroom;
 316		mtu = min(tdev->mtu, IP_MAX_MTU);
 317	}
 318
 319	dev->needed_headroom = t_hlen + hlen;
 320	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
 321
 322	if (mtu < IPV4_MIN_MTU)
 323		mtu = IPV4_MIN_MTU;
 324
 325	return mtu;
 326}
 327
 328static struct ip_tunnel *ip_tunnel_create(struct net *net,
 329					  struct ip_tunnel_net *itn,
 330					  struct ip_tunnel_parm *parms)
 331{
 332	struct ip_tunnel *nt;
 333	struct net_device *dev;
 334	int t_hlen;
 335	int mtu;
 336	int err;
 337
 338	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 339	if (IS_ERR(dev))
 340		return ERR_CAST(dev);
 341
 342	mtu = ip_tunnel_bind_dev(dev);
 343	err = dev_set_mtu(dev, mtu);
 344	if (err)
 345		goto err_dev_set_mtu;
 346
 347	nt = netdev_priv(dev);
 348	t_hlen = nt->hlen + sizeof(struct iphdr);
 349	dev->min_mtu = ETH_MIN_MTU;
 350	dev->max_mtu = IP_MAX_MTU - t_hlen;
 351	if (dev->type == ARPHRD_ETHER)
 352		dev->max_mtu -= dev->hard_header_len;
 353
 354	ip_tunnel_add(itn, nt);
 355	return nt;
 356
 357err_dev_set_mtu:
 358	unregister_netdevice(dev);
 359	return ERR_PTR(err);
 360}
 361
 362int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 363		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 364		  bool log_ecn_error)
 365{
 
 366	const struct iphdr *iph = ip_hdr(skb);
 367	int err;
 368
 369#ifdef CONFIG_NET_IPGRE_BROADCAST
 370	if (ipv4_is_multicast(iph->daddr)) {
 371		DEV_STATS_INC(tunnel->dev, multicast);
 372		skb->pkt_type = PACKET_BROADCAST;
 373	}
 374#endif
 375
 376	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
 377	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
 378		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
 379		DEV_STATS_INC(tunnel->dev, rx_errors);
 380		goto drop;
 381	}
 382
 383	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
 384		if (!(tpi->flags&TUNNEL_SEQ) ||
 385		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 386			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
 387			DEV_STATS_INC(tunnel->dev, rx_errors);
 388			goto drop;
 389		}
 390		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 391	}
 392
 393	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
 394
 395	err = IP_ECN_decapsulate(iph, skb);
 396	if (unlikely(err)) {
 397		if (log_ecn_error)
 398			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 399					&iph->saddr, iph->tos);
 400		if (err > 1) {
 401			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
 402			DEV_STATS_INC(tunnel->dev, rx_errors);
 403			goto drop;
 404		}
 405	}
 406
 407	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
 
 
 
 
 
 408	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 409
 410	if (tunnel->dev->type == ARPHRD_ETHER) {
 411		skb->protocol = eth_type_trans(skb, tunnel->dev);
 412		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 413	} else {
 414		skb->dev = tunnel->dev;
 415	}
 416
 417	if (tun_dst)
 418		skb_dst_set(skb, (struct dst_entry *)tun_dst);
 419
 420	gro_cells_receive(&tunnel->gro_cells, skb);
 421	return 0;
 422
 423drop:
 424	if (tun_dst)
 425		dst_release((struct dst_entry *)tun_dst);
 426	kfree_skb(skb);
 427	return 0;
 428}
 429EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 430
 431int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 432			    unsigned int num)
 433{
 434	if (num >= MAX_IPTUN_ENCAP_OPS)
 435		return -ERANGE;
 436
 437	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 438			&iptun_encaps[num],
 439			NULL, ops) ? 0 : -1;
 440}
 441EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
 442
 443int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 444			    unsigned int num)
 445{
 446	int ret;
 447
 448	if (num >= MAX_IPTUN_ENCAP_OPS)
 449		return -ERANGE;
 450
 451	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 452		       &iptun_encaps[num],
 453		       ops, NULL) == ops) ? 0 : -1;
 454
 455	synchronize_net();
 456
 457	return ret;
 458}
 459EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
 460
 461int ip_tunnel_encap_setup(struct ip_tunnel *t,
 462			  struct ip_tunnel_encap *ipencap)
 463{
 464	int hlen;
 465
 466	memset(&t->encap, 0, sizeof(t->encap));
 467
 468	hlen = ip_encap_hlen(ipencap);
 469	if (hlen < 0)
 470		return hlen;
 471
 472	t->encap.type = ipencap->type;
 473	t->encap.sport = ipencap->sport;
 474	t->encap.dport = ipencap->dport;
 475	t->encap.flags = ipencap->flags;
 476
 477	t->encap_hlen = hlen;
 478	t->hlen = t->encap_hlen + t->tun_hlen;
 479
 480	return 0;
 481}
 482EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 483
 484static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 485			    struct rtable *rt, __be16 df,
 486			    const struct iphdr *inner_iph,
 487			    int tunnel_hlen, __be32 dst, bool md)
 488{
 489	struct ip_tunnel *tunnel = netdev_priv(dev);
 490	int pkt_size;
 491	int mtu;
 492
 493	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
 494	pkt_size = skb->len - tunnel_hlen;
 495	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
 496
 497	if (df) {
 498		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
 499		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
 500	} else {
 501		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 502	}
 503
 504	if (skb_valid_dst(skb))
 505		skb_dst_update_pmtu_no_confirm(skb, mtu);
 506
 507	if (skb->protocol == htons(ETH_P_IP)) {
 508		if (!skb_is_gso(skb) &&
 509		    (inner_iph->frag_off & htons(IP_DF)) &&
 510		    mtu < pkt_size) {
 511			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 
 512			return -E2BIG;
 513		}
 514	}
 515#if IS_ENABLED(CONFIG_IPV6)
 516	else if (skb->protocol == htons(ETH_P_IPV6)) {
 517		struct rt6_info *rt6;
 518		__be32 daddr;
 519
 520		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
 521					   NULL;
 522		daddr = md ? dst : tunnel->parms.iph.daddr;
 523
 524		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 525			   mtu >= IPV6_MIN_MTU) {
 526			if ((daddr && !ipv4_is_multicast(daddr)) ||
 
 527			    rt6->rt6i_dst.plen == 128) {
 528				rt6->rt6i_flags |= RTF_MODIFIED;
 529				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 530			}
 531		}
 532
 533		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 534					mtu < pkt_size) {
 535			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 536			return -E2BIG;
 537		}
 538	}
 539#endif
 540	return 0;
 541}
 542
 543void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 544		       u8 proto, int tunnel_hlen)
 545{
 546	struct ip_tunnel *tunnel = netdev_priv(dev);
 547	u32 headroom = sizeof(struct iphdr);
 548	struct ip_tunnel_info *tun_info;
 549	const struct ip_tunnel_key *key;
 550	const struct iphdr *inner_iph;
 551	struct rtable *rt = NULL;
 552	struct flowi4 fl4;
 553	__be16 df = 0;
 554	u8 tos, ttl;
 555	bool use_cache;
 556
 557	tun_info = skb_tunnel_info(skb);
 558	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 559		     ip_tunnel_info_af(tun_info) != AF_INET))
 560		goto tx_error;
 561	key = &tun_info->key;
 562	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 563	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 564	tos = key->tos;
 565	if (tos == 1) {
 566		if (skb->protocol == htons(ETH_P_IP))
 567			tos = inner_iph->tos;
 568		else if (skb->protocol == htons(ETH_P_IPV6))
 569			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 570	}
 571	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
 572			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
 573			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
 574			    key->flow_flags);
 575	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 576		goto tx_error;
 577
 578	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 579	if (use_cache)
 580		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
 581	if (!rt) {
 582		rt = ip_route_output_key(tunnel->net, &fl4);
 583		if (IS_ERR(rt)) {
 584			DEV_STATS_INC(dev, tx_carrier_errors);
 585			goto tx_error;
 586		}
 587		if (use_cache)
 588			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 589					  fl4.saddr);
 590	}
 591	if (rt->dst.dev == dev) {
 592		ip_rt_put(rt);
 593		DEV_STATS_INC(dev, collisions);
 594		goto tx_error;
 595	}
 596
 597	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
 598		df = htons(IP_DF);
 599	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
 600			    key->u.ipv4.dst, true)) {
 601		ip_rt_put(rt);
 602		goto tx_error;
 603	}
 604
 605	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 606	ttl = key->ttl;
 607	if (ttl == 0) {
 608		if (skb->protocol == htons(ETH_P_IP))
 609			ttl = inner_iph->ttl;
 610		else if (skb->protocol == htons(ETH_P_IPV6))
 611			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 612		else
 613			ttl = ip4_dst_hoplimit(&rt->dst);
 614	}
 615
 
 
 
 616	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 617	if (headroom > dev->needed_headroom)
 618		dev->needed_headroom = headroom;
 619
 620	if (skb_cow_head(skb, dev->needed_headroom)) {
 621		ip_rt_put(rt);
 622		goto tx_dropped;
 623	}
 624	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
 625		      df, !net_eq(tunnel->net, dev_net(dev)));
 626	return;
 627tx_error:
 628	DEV_STATS_INC(dev, tx_errors);
 629	goto kfree;
 630tx_dropped:
 631	DEV_STATS_INC(dev, tx_dropped);
 632kfree:
 633	kfree_skb(skb);
 634}
 635EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
 636
 637void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 638		    const struct iphdr *tnl_params, u8 protocol)
 639{
 640	struct ip_tunnel *tunnel = netdev_priv(dev);
 641	struct ip_tunnel_info *tun_info = NULL;
 642	const struct iphdr *inner_iph;
 
 
 
 
 643	unsigned int max_headroom;	/* The extra header space needed */
 644	struct rtable *rt = NULL;		/* Route to the other host */
 645	__be16 payload_protocol;
 646	bool use_cache = false;
 647	struct flowi4 fl4;
 648	bool md = false;
 649	bool connected;
 650	u8 tos, ttl;
 651	__be32 dst;
 652	__be16 df;
 653
 654	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 655	connected = (tunnel->parms.iph.daddr != 0);
 656	payload_protocol = skb_protocol(skb, true);
 657
 658	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 659
 660	dst = tnl_params->daddr;
 661	if (dst == 0) {
 662		/* NBMA tunnel */
 663
 664		if (!skb_dst(skb)) {
 665			DEV_STATS_INC(dev, tx_fifo_errors);
 666			goto tx_error;
 667		}
 668
 669		tun_info = skb_tunnel_info(skb);
 670		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
 671		    ip_tunnel_info_af(tun_info) == AF_INET &&
 672		    tun_info->key.u.ipv4.dst) {
 673			dst = tun_info->key.u.ipv4.dst;
 674			md = true;
 675			connected = true;
 676		} else if (payload_protocol == htons(ETH_P_IP)) {
 677			rt = skb_rtable(skb);
 678			dst = rt_nexthop(rt, inner_iph->daddr);
 679		}
 680#if IS_ENABLED(CONFIG_IPV6)
 681		else if (payload_protocol == htons(ETH_P_IPV6)) {
 682			const struct in6_addr *addr6;
 683			struct neighbour *neigh;
 684			bool do_tx_error_icmp;
 685			int addr_type;
 686
 687			neigh = dst_neigh_lookup(skb_dst(skb),
 688						 &ipv6_hdr(skb)->daddr);
 689			if (!neigh)
 690				goto tx_error;
 691
 692			addr6 = (const struct in6_addr *)&neigh->primary_key;
 693			addr_type = ipv6_addr_type(addr6);
 694
 695			if (addr_type == IPV6_ADDR_ANY) {
 696				addr6 = &ipv6_hdr(skb)->daddr;
 697				addr_type = ipv6_addr_type(addr6);
 698			}
 699
 700			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 701				do_tx_error_icmp = true;
 702			else {
 703				do_tx_error_icmp = false;
 704				dst = addr6->s6_addr32[3];
 705			}
 706			neigh_release(neigh);
 707			if (do_tx_error_icmp)
 708				goto tx_error_icmp;
 709		}
 710#endif
 711		else
 712			goto tx_error;
 713
 714		if (!md)
 715			connected = false;
 716	}
 717
 718	tos = tnl_params->tos;
 719	if (tos & 0x1) {
 720		tos &= ~0x1;
 721		if (payload_protocol == htons(ETH_P_IP)) {
 722			tos = inner_iph->tos;
 723			connected = false;
 724		} else if (payload_protocol == htons(ETH_P_IPV6)) {
 725			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 726			connected = false;
 727		}
 728	}
 729
 730	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 731			    tunnel->parms.o_key, RT_TOS(tos),
 732			    dev_net(dev), tunnel->parms.link,
 733			    tunnel->fwmark, skb_get_hash(skb), 0);
 734
 735	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 736		goto tx_error;
 737
 738	if (connected && md) {
 739		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 740		if (use_cache)
 741			rt = dst_cache_get_ip4(&tun_info->dst_cache,
 742					       &fl4.saddr);
 743	} else {
 744		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
 745						&fl4.saddr) : NULL;
 746	}
 747
 748	if (!rt) {
 749		rt = ip_route_output_key(tunnel->net, &fl4);
 750
 751		if (IS_ERR(rt)) {
 752			DEV_STATS_INC(dev, tx_carrier_errors);
 753			goto tx_error;
 754		}
 755		if (use_cache)
 756			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 757					  fl4.saddr);
 758		else if (!md && connected)
 759			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 760					  fl4.saddr);
 761	}
 762
 763	if (rt->dst.dev == dev) {
 764		ip_rt_put(rt);
 765		DEV_STATS_INC(dev, collisions);
 766		goto tx_error;
 767	}
 768
 769	df = tnl_params->frag_off;
 770	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
 771		df |= (inner_iph->frag_off & htons(IP_DF));
 772
 773	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
 774		ip_rt_put(rt);
 775		goto tx_error;
 776	}
 777
 778	if (tunnel->err_count > 0) {
 779		if (time_before(jiffies,
 780				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 781			tunnel->err_count--;
 782
 783			dst_link_failure(skb);
 784		} else
 785			tunnel->err_count = 0;
 786	}
 787
 788	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 789	ttl = tnl_params->ttl;
 790	if (ttl == 0) {
 791		if (payload_protocol == htons(ETH_P_IP))
 792			ttl = inner_iph->ttl;
 793#if IS_ENABLED(CONFIG_IPV6)
 794		else if (payload_protocol == htons(ETH_P_IPV6))
 795			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 796#endif
 797		else
 798			ttl = ip4_dst_hoplimit(&rt->dst);
 799	}
 800
 
 
 
 
 801	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 802			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 803	if (max_headroom > dev->needed_headroom)
 804		dev->needed_headroom = max_headroom;
 805
 806	if (skb_cow_head(skb, dev->needed_headroom)) {
 807		ip_rt_put(rt);
 808		DEV_STATS_INC(dev, tx_dropped);
 809		kfree_skb(skb);
 810		return;
 811	}
 812
 813	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
 814		      df, !net_eq(tunnel->net, dev_net(dev)));
 815	return;
 816
 817#if IS_ENABLED(CONFIG_IPV6)
 818tx_error_icmp:
 819	dst_link_failure(skb);
 820#endif
 821tx_error:
 822	DEV_STATS_INC(dev, tx_errors);
 823	kfree_skb(skb);
 824}
 825EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
 826
 827static void ip_tunnel_update(struct ip_tunnel_net *itn,
 828			     struct ip_tunnel *t,
 829			     struct net_device *dev,
 830			     struct ip_tunnel_parm *p,
 831			     bool set_mtu,
 832			     __u32 fwmark)
 833{
 834	ip_tunnel_del(itn, t);
 835	t->parms.iph.saddr = p->iph.saddr;
 836	t->parms.iph.daddr = p->iph.daddr;
 837	t->parms.i_key = p->i_key;
 838	t->parms.o_key = p->o_key;
 839	if (dev->type != ARPHRD_ETHER) {
 840		__dev_addr_set(dev, &p->iph.saddr, 4);
 841		memcpy(dev->broadcast, &p->iph.daddr, 4);
 842	}
 843	ip_tunnel_add(itn, t);
 844
 845	t->parms.iph.ttl = p->iph.ttl;
 846	t->parms.iph.tos = p->iph.tos;
 847	t->parms.iph.frag_off = p->iph.frag_off;
 848
 849	if (t->parms.link != p->link || t->fwmark != fwmark) {
 850		int mtu;
 851
 852		t->parms.link = p->link;
 853		t->fwmark = fwmark;
 854		mtu = ip_tunnel_bind_dev(dev);
 855		if (set_mtu)
 856			dev->mtu = mtu;
 857	}
 858	dst_cache_reset(&t->dst_cache);
 859	netdev_state_change(dev);
 860}
 861
 862int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 863{
 864	int err = 0;
 865	struct ip_tunnel *t = netdev_priv(dev);
 866	struct net *net = t->net;
 867	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 868
 869	switch (cmd) {
 870	case SIOCGETTUNNEL:
 871		if (dev == itn->fb_tunnel_dev) {
 872			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 873			if (!t)
 874				t = netdev_priv(dev);
 875		}
 876		memcpy(p, &t->parms, sizeof(*p));
 877		break;
 878
 879	case SIOCADDTUNNEL:
 880	case SIOCCHGTUNNEL:
 881		err = -EPERM;
 882		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 883			goto done;
 884		if (p->iph.ttl)
 885			p->iph.frag_off |= htons(IP_DF);
 886		if (!(p->i_flags & VTI_ISVTI)) {
 887			if (!(p->i_flags & TUNNEL_KEY))
 888				p->i_key = 0;
 889			if (!(p->o_flags & TUNNEL_KEY))
 890				p->o_key = 0;
 891		}
 892
 893		t = ip_tunnel_find(itn, p, itn->type);
 894
 895		if (cmd == SIOCADDTUNNEL) {
 896			if (!t) {
 897				t = ip_tunnel_create(net, itn, p);
 898				err = PTR_ERR_OR_ZERO(t);
 899				break;
 900			}
 901
 902			err = -EEXIST;
 903			break;
 904		}
 905		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 906			if (t) {
 907				if (t->dev != dev) {
 908					err = -EEXIST;
 909					break;
 910				}
 911			} else {
 912				unsigned int nflags = 0;
 913
 914				if (ipv4_is_multicast(p->iph.daddr))
 915					nflags = IFF_BROADCAST;
 916				else if (p->iph.daddr)
 917					nflags = IFF_POINTOPOINT;
 918
 919				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 920					err = -EINVAL;
 921					break;
 922				}
 923
 924				t = netdev_priv(dev);
 925			}
 926		}
 927
 928		if (t) {
 929			err = 0;
 930			ip_tunnel_update(itn, t, dev, p, true, 0);
 931		} else {
 932			err = -ENOENT;
 933		}
 934		break;
 935
 936	case SIOCDELTUNNEL:
 937		err = -EPERM;
 938		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 939			goto done;
 940
 941		if (dev == itn->fb_tunnel_dev) {
 942			err = -ENOENT;
 943			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 944			if (!t)
 945				goto done;
 946			err = -EPERM;
 947			if (t == netdev_priv(itn->fb_tunnel_dev))
 948				goto done;
 949			dev = t->dev;
 950		}
 951		unregister_netdevice(dev);
 952		err = 0;
 953		break;
 954
 955	default:
 956		err = -EINVAL;
 957	}
 958
 959done:
 960	return err;
 961}
 962EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
 963
 964int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
 965			     void __user *data, int cmd)
 966{
 967	struct ip_tunnel_parm p;
 968	int err;
 969
 970	if (copy_from_user(&p, data, sizeof(p)))
 971		return -EFAULT;
 972	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
 973	if (!err && copy_to_user(data, &p, sizeof(p)))
 974		return -EFAULT;
 975	return err;
 976}
 977EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
 978
 979int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 980{
 981	struct ip_tunnel *tunnel = netdev_priv(dev);
 982	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 983	int max_mtu = IP_MAX_MTU - t_hlen;
 984
 985	if (dev->type == ARPHRD_ETHER)
 986		max_mtu -= dev->hard_header_len;
 987
 988	if (new_mtu < ETH_MIN_MTU)
 989		return -EINVAL;
 990
 991	if (new_mtu > max_mtu) {
 992		if (strict)
 993			return -EINVAL;
 994
 995		new_mtu = max_mtu;
 996	}
 997
 998	dev->mtu = new_mtu;
 999	return 0;
1000}
1001EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1002
1003int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1004{
1005	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1006}
1007EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1008
1009static void ip_tunnel_dev_free(struct net_device *dev)
1010{
1011	struct ip_tunnel *tunnel = netdev_priv(dev);
1012
1013	gro_cells_destroy(&tunnel->gro_cells);
1014	dst_cache_destroy(&tunnel->dst_cache);
1015	free_percpu(dev->tstats);
1016}
1017
1018void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1019{
1020	struct ip_tunnel *tunnel = netdev_priv(dev);
1021	struct ip_tunnel_net *itn;
1022
1023	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1024
1025	if (itn->fb_tunnel_dev != dev) {
1026		ip_tunnel_del(itn, netdev_priv(dev));
1027		unregister_netdevice_queue(dev, head);
1028	}
1029}
1030EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1031
1032struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1033{
1034	struct ip_tunnel *tunnel = netdev_priv(dev);
1035
1036	return tunnel->net;
1037}
1038EXPORT_SYMBOL(ip_tunnel_get_link_net);
1039
1040int ip_tunnel_get_iflink(const struct net_device *dev)
1041{
1042	struct ip_tunnel *tunnel = netdev_priv(dev);
1043
1044	return tunnel->parms.link;
1045}
1046EXPORT_SYMBOL(ip_tunnel_get_iflink);
1047
1048int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1049				  struct rtnl_link_ops *ops, char *devname)
1050{
1051	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1052	struct ip_tunnel_parm parms;
1053	unsigned int i;
1054
1055	itn->rtnl_link_ops = ops;
1056	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1057		INIT_HLIST_HEAD(&itn->tunnels[i]);
1058
1059	if (!ops || !net_has_fallback_tunnels(net)) {
1060		struct ip_tunnel_net *it_init_net;
1061
1062		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1063		itn->type = it_init_net->type;
1064		itn->fb_tunnel_dev = NULL;
1065		return 0;
1066	}
1067
1068	memset(&parms, 0, sizeof(parms));
1069	if (devname)
1070		strscpy(parms.name, devname, IFNAMSIZ);
1071
1072	rtnl_lock();
1073	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1074	/* FB netdevice is special: we have one, and only one per netns.
1075	 * Allowing to move it to another netns is clearly unsafe.
1076	 */
1077	if (!IS_ERR(itn->fb_tunnel_dev)) {
1078		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1079		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1080		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1081		itn->type = itn->fb_tunnel_dev->type;
1082	}
1083	rtnl_unlock();
1084
1085	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1086}
1087EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1088
1089static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1090			      struct list_head *head,
1091			      struct rtnl_link_ops *ops)
1092{
1093	struct net_device *dev, *aux;
1094	int h;
1095
1096	for_each_netdev_safe(net, dev, aux)
1097		if (dev->rtnl_link_ops == ops)
1098			unregister_netdevice_queue(dev, head);
1099
1100	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1101		struct ip_tunnel *t;
1102		struct hlist_node *n;
1103		struct hlist_head *thead = &itn->tunnels[h];
1104
1105		hlist_for_each_entry_safe(t, n, thead, hash_node)
1106			/* If dev is in the same netns, it has already
1107			 * been added to the list by the previous loop.
1108			 */
1109			if (!net_eq(dev_net(t->dev), net))
1110				unregister_netdevice_queue(t->dev, head);
1111	}
1112}
1113
1114void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1115			   struct rtnl_link_ops *ops)
1116{
1117	struct ip_tunnel_net *itn;
1118	struct net *net;
1119	LIST_HEAD(list);
1120
1121	rtnl_lock();
1122	list_for_each_entry(net, net_list, exit_list) {
1123		itn = net_generic(net, id);
1124		ip_tunnel_destroy(net, itn, &list, ops);
1125	}
1126	unregister_netdevice_many(&list);
1127	rtnl_unlock();
1128}
1129EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1130
1131int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1132		      struct ip_tunnel_parm *p, __u32 fwmark)
1133{
1134	struct ip_tunnel *nt;
1135	struct net *net = dev_net(dev);
1136	struct ip_tunnel_net *itn;
1137	int mtu;
1138	int err;
1139
1140	nt = netdev_priv(dev);
1141	itn = net_generic(net, nt->ip_tnl_net_id);
1142
1143	if (nt->collect_md) {
1144		if (rtnl_dereference(itn->collect_md_tun))
1145			return -EEXIST;
1146	} else {
1147		if (ip_tunnel_find(itn, p, dev->type))
1148			return -EEXIST;
1149	}
1150
1151	nt->net = net;
1152	nt->parms = *p;
1153	nt->fwmark = fwmark;
1154	err = register_netdevice(dev);
1155	if (err)
1156		goto err_register_netdevice;
1157
1158	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1159		eth_hw_addr_random(dev);
1160
1161	mtu = ip_tunnel_bind_dev(dev);
1162	if (tb[IFLA_MTU]) {
1163		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1164
1165		if (dev->type == ARPHRD_ETHER)
1166			max -= dev->hard_header_len;
1167
1168		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1169	}
1170
1171	err = dev_set_mtu(dev, mtu);
1172	if (err)
1173		goto err_dev_set_mtu;
1174
1175	ip_tunnel_add(itn, nt);
1176	return 0;
1177
1178err_dev_set_mtu:
1179	unregister_netdevice(dev);
1180err_register_netdevice:
1181	return err;
1182}
1183EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1184
1185int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1186			 struct ip_tunnel_parm *p, __u32 fwmark)
1187{
1188	struct ip_tunnel *t;
1189	struct ip_tunnel *tunnel = netdev_priv(dev);
1190	struct net *net = tunnel->net;
1191	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1192
1193	if (dev == itn->fb_tunnel_dev)
1194		return -EINVAL;
1195
1196	t = ip_tunnel_find(itn, p, dev->type);
1197
1198	if (t) {
1199		if (t->dev != dev)
1200			return -EEXIST;
1201	} else {
1202		t = tunnel;
1203
1204		if (dev->type != ARPHRD_ETHER) {
1205			unsigned int nflags = 0;
1206
1207			if (ipv4_is_multicast(p->iph.daddr))
1208				nflags = IFF_BROADCAST;
1209			else if (p->iph.daddr)
1210				nflags = IFF_POINTOPOINT;
1211
1212			if ((dev->flags ^ nflags) &
1213			    (IFF_POINTOPOINT | IFF_BROADCAST))
1214				return -EINVAL;
1215		}
1216	}
1217
1218	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1219	return 0;
1220}
1221EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1222
1223int ip_tunnel_init(struct net_device *dev)
1224{
1225	struct ip_tunnel *tunnel = netdev_priv(dev);
1226	struct iphdr *iph = &tunnel->parms.iph;
1227	int err;
1228
1229	dev->needs_free_netdev = true;
1230	dev->priv_destructor = ip_tunnel_dev_free;
1231	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1232	if (!dev->tstats)
1233		return -ENOMEM;
1234
1235	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1236	if (err) {
1237		free_percpu(dev->tstats);
1238		return err;
1239	}
1240
1241	err = gro_cells_init(&tunnel->gro_cells, dev);
1242	if (err) {
1243		dst_cache_destroy(&tunnel->dst_cache);
1244		free_percpu(dev->tstats);
1245		return err;
1246	}
1247
1248	tunnel->dev = dev;
1249	tunnel->net = dev_net(dev);
1250	strcpy(tunnel->parms.name, dev->name);
1251	iph->version		= 4;
1252	iph->ihl		= 5;
1253
1254	if (tunnel->collect_md)
 
1255		netif_keep_dst(dev);
 
1256	return 0;
1257}
1258EXPORT_SYMBOL_GPL(ip_tunnel_init);
1259
1260void ip_tunnel_uninit(struct net_device *dev)
1261{
1262	struct ip_tunnel *tunnel = netdev_priv(dev);
1263	struct net *net = tunnel->net;
1264	struct ip_tunnel_net *itn;
1265
1266	itn = net_generic(net, tunnel->ip_tnl_net_id);
1267	ip_tunnel_del(itn, netdev_priv(dev));
1268	if (itn->fb_tunnel_dev == dev)
1269		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1270
1271	dst_cache_reset(&tunnel->dst_cache);
1272}
1273EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1274
1275/* Do least required initialization, rest of init is done in tunnel_init call */
1276void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1277{
1278	struct ip_tunnel *tunnel = netdev_priv(dev);
1279	tunnel->ip_tnl_net_id = net_id;
1280}
1281EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1282
1283MODULE_LICENSE("GPL");