Linux Audio

Check our new training course

Loading...
v4.17
   1/*
   2 * Copyright (c) 2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/capability.h>
  22#include <linux/module.h>
  23#include <linux/types.h>
  24#include <linux/kernel.h>
  25#include <linux/slab.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/init.h>
  34#include <linux/in6.h>
  35#include <linux/inetdevice.h>
  36#include <linux/igmp.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/etherdevice.h>
  39#include <linux/if_ether.h>
  40#include <linux/if_vlan.h>
  41#include <linux/rculist.h>
  42#include <linux/err.h>
  43
  44#include <net/sock.h>
  45#include <net/ip.h>
  46#include <net/icmp.h>
  47#include <net/protocol.h>
  48#include <net/ip_tunnels.h>
  49#include <net/arp.h>
  50#include <net/checksum.h>
  51#include <net/dsfield.h>
  52#include <net/inet_ecn.h>
  53#include <net/xfrm.h>
  54#include <net/net_namespace.h>
  55#include <net/netns/generic.h>
  56#include <net/rtnetlink.h>
  57#include <net/udp.h>
  58#include <net/dst_metadata.h>
  59
  60#if IS_ENABLED(CONFIG_IPV6)
  61#include <net/ipv6.h>
  62#include <net/ip6_fib.h>
  63#include <net/ip6_route.h>
  64#endif
  65
  66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
  67{
  68	return hash_32((__force u32)key ^ (__force u32)remote,
  69			 IP_TNL_HASH_BITS);
  70}
  71
  72static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
  73				__be16 flags, __be32 key)
  74{
  75	if (p->i_flags & TUNNEL_KEY) {
  76		if (flags & TUNNEL_KEY)
  77			return key == p->i_key;
  78		else
  79			/* key expected, none present */
  80			return false;
  81	} else
  82		return !(flags & TUNNEL_KEY);
  83}
  84
  85/* Fallback tunnel: no source, no destination, no key, no options
  86
  87   Tunnel hash table:
  88   We require exact key match i.e. if a key is present in packet
  89   it will match only tunnel with the same key; if it is not present,
  90   it will match only keyless tunnel.
  91
  92   All keysless packets, if not matched configured keyless tunnels
  93   will match fallback tunnel.
  94   Given src, dst and key, find appropriate for input tunnel.
  95*/
  96struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  97				   int link, __be16 flags,
  98				   __be32 remote, __be32 local,
  99				   __be32 key)
 100{
 101	unsigned int hash;
 102	struct ip_tunnel *t, *cand = NULL;
 103	struct hlist_head *head;
 104
 105	hash = ip_tunnel_hash(key, remote);
 106	head = &itn->tunnels[hash];
 107
 108	hlist_for_each_entry_rcu(t, head, hash_node) {
 109		if (local != t->parms.iph.saddr ||
 110		    remote != t->parms.iph.daddr ||
 111		    !(t->dev->flags & IFF_UP))
 112			continue;
 113
 114		if (!ip_tunnel_key_match(&t->parms, flags, key))
 115			continue;
 116
 117		if (t->parms.link == link)
 118			return t;
 119		else
 120			cand = t;
 121	}
 122
 123	hlist_for_each_entry_rcu(t, head, hash_node) {
 124		if (remote != t->parms.iph.daddr ||
 125		    t->parms.iph.saddr != 0 ||
 126		    !(t->dev->flags & IFF_UP))
 127			continue;
 128
 129		if (!ip_tunnel_key_match(&t->parms, flags, key))
 130			continue;
 131
 132		if (t->parms.link == link)
 133			return t;
 134		else if (!cand)
 135			cand = t;
 136	}
 137
 138	hash = ip_tunnel_hash(key, 0);
 139	head = &itn->tunnels[hash];
 140
 141	hlist_for_each_entry_rcu(t, head, hash_node) {
 142		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
 143		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
 144			continue;
 145
 146		if (!(t->dev->flags & IFF_UP))
 147			continue;
 148
 149		if (!ip_tunnel_key_match(&t->parms, flags, key))
 150			continue;
 151
 152		if (t->parms.link == link)
 153			return t;
 154		else if (!cand)
 155			cand = t;
 156	}
 157
 158	if (flags & TUNNEL_NO_KEY)
 159		goto skip_key_lookup;
 160
 161	hlist_for_each_entry_rcu(t, head, hash_node) {
 162		if (t->parms.i_key != key ||
 163		    t->parms.iph.saddr != 0 ||
 164		    t->parms.iph.daddr != 0 ||
 165		    !(t->dev->flags & IFF_UP))
 166			continue;
 167
 168		if (t->parms.link == link)
 169			return t;
 170		else if (!cand)
 171			cand = t;
 172	}
 173
 174skip_key_lookup:
 175	if (cand)
 176		return cand;
 177
 178	t = rcu_dereference(itn->collect_md_tun);
 179	if (t && t->dev->flags & IFF_UP)
 180		return t;
 181
 182	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
 183		return netdev_priv(itn->fb_tunnel_dev);
 184
 185	return NULL;
 186}
 187EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
 188
 189static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 190				    struct ip_tunnel_parm *parms)
 191{
 192	unsigned int h;
 193	__be32 remote;
 194	__be32 i_key = parms->i_key;
 195
 196	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
 197		remote = parms->iph.daddr;
 198	else
 199		remote = 0;
 200
 201	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
 202		i_key = 0;
 203
 204	h = ip_tunnel_hash(i_key, remote);
 205	return &itn->tunnels[h];
 206}
 207
 208static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 209{
 210	struct hlist_head *head = ip_bucket(itn, &t->parms);
 211
 212	if (t->collect_md)
 213		rcu_assign_pointer(itn->collect_md_tun, t);
 214	hlist_add_head_rcu(&t->hash_node, head);
 215}
 216
 217static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 218{
 219	if (t->collect_md)
 220		rcu_assign_pointer(itn->collect_md_tun, NULL);
 221	hlist_del_init_rcu(&t->hash_node);
 222}
 223
 224static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 225					struct ip_tunnel_parm *parms,
 226					int type)
 227{
 228	__be32 remote = parms->iph.daddr;
 229	__be32 local = parms->iph.saddr;
 230	__be32 key = parms->i_key;
 231	__be16 flags = parms->i_flags;
 232	int link = parms->link;
 233	struct ip_tunnel *t = NULL;
 234	struct hlist_head *head = ip_bucket(itn, parms);
 235
 236	hlist_for_each_entry_rcu(t, head, hash_node) {
 237		if (local == t->parms.iph.saddr &&
 238		    remote == t->parms.iph.daddr &&
 239		    link == t->parms.link &&
 240		    type == t->dev->type &&
 241		    ip_tunnel_key_match(&t->parms, flags, key))
 242			break;
 243	}
 244	return t;
 245}
 246
 247static struct net_device *__ip_tunnel_create(struct net *net,
 248					     const struct rtnl_link_ops *ops,
 249					     struct ip_tunnel_parm *parms)
 250{
 251	int err;
 252	struct ip_tunnel *tunnel;
 253	struct net_device *dev;
 254	char name[IFNAMSIZ];
 255
 256	err = -E2BIG;
 257	if (parms->name[0]) {
 258		if (!dev_valid_name(parms->name))
 259			goto failed;
 260		strlcpy(name, parms->name, IFNAMSIZ);
 261	} else {
 262		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 
 263			goto failed;
 
 264		strlcpy(name, ops->kind, IFNAMSIZ);
 265		strncat(name, "%d", 2);
 266	}
 267
 268	ASSERT_RTNL();
 269	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 270	if (!dev) {
 271		err = -ENOMEM;
 272		goto failed;
 273	}
 274	dev_net_set(dev, net);
 275
 276	dev->rtnl_link_ops = ops;
 277
 278	tunnel = netdev_priv(dev);
 279	tunnel->parms = *parms;
 280	tunnel->net = net;
 281
 282	err = register_netdevice(dev);
 283	if (err)
 284		goto failed_free;
 285
 286	return dev;
 287
 288failed_free:
 289	free_netdev(dev);
 290failed:
 291	return ERR_PTR(err);
 292}
 293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 294static int ip_tunnel_bind_dev(struct net_device *dev)
 295{
 296	struct net_device *tdev = NULL;
 297	struct ip_tunnel *tunnel = netdev_priv(dev);
 298	const struct iphdr *iph;
 299	int hlen = LL_MAX_HEADER;
 300	int mtu = ETH_DATA_LEN;
 301	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 302
 303	iph = &tunnel->parms.iph;
 304
 305	/* Guess output device to choose reasonable mtu and needed_headroom */
 306	if (iph->daddr) {
 307		struct flowi4 fl4;
 308		struct rtable *rt;
 309
 310		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 311				    iph->saddr, tunnel->parms.o_key,
 312				    RT_TOS(iph->tos), tunnel->parms.link,
 313				    tunnel->fwmark);
 314		rt = ip_route_output_key(tunnel->net, &fl4);
 315
 316		if (!IS_ERR(rt)) {
 317			tdev = rt->dst.dev;
 318			ip_rt_put(rt);
 319		}
 320		if (dev->type != ARPHRD_ETHER)
 321			dev->flags |= IFF_POINTOPOINT;
 322
 323		dst_cache_reset(&tunnel->dst_cache);
 324	}
 325
 326	if (!tdev && tunnel->parms.link)
 327		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 328
 329	if (tdev) {
 330		hlen = tdev->hard_header_len + tdev->needed_headroom;
 331		mtu = min(tdev->mtu, IP_MAX_MTU);
 332	}
 333
 334	dev->needed_headroom = t_hlen + hlen;
 335	mtu -= (dev->hard_header_len + t_hlen);
 336
 337	if (mtu < IPV4_MIN_MTU)
 338		mtu = IPV4_MIN_MTU;
 339
 340	return mtu;
 341}
 342
 343static struct ip_tunnel *ip_tunnel_create(struct net *net,
 344					  struct ip_tunnel_net *itn,
 345					  struct ip_tunnel_parm *parms)
 346{
 347	struct ip_tunnel *nt;
 348	struct net_device *dev;
 349	int t_hlen;
 350	int mtu;
 351	int err;
 352
 353	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 
 354	if (IS_ERR(dev))
 355		return ERR_CAST(dev);
 356
 357	mtu = ip_tunnel_bind_dev(dev);
 358	err = dev_set_mtu(dev, mtu);
 359	if (err)
 360		goto err_dev_set_mtu;
 361
 362	nt = netdev_priv(dev);
 363	t_hlen = nt->hlen + sizeof(struct iphdr);
 364	dev->min_mtu = ETH_MIN_MTU;
 365	dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 366	ip_tunnel_add(itn, nt);
 367	return nt;
 368
 369err_dev_set_mtu:
 370	unregister_netdevice(dev);
 371	return ERR_PTR(err);
 372}
 373
 374int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 375		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 376		  bool log_ecn_error)
 377{
 378	struct pcpu_sw_netstats *tstats;
 379	const struct iphdr *iph = ip_hdr(skb);
 380	int err;
 381
 382#ifdef CONFIG_NET_IPGRE_BROADCAST
 383	if (ipv4_is_multicast(iph->daddr)) {
 384		tunnel->dev->stats.multicast++;
 385		skb->pkt_type = PACKET_BROADCAST;
 386	}
 387#endif
 388
 389	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
 390	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
 391		tunnel->dev->stats.rx_crc_errors++;
 392		tunnel->dev->stats.rx_errors++;
 393		goto drop;
 394	}
 395
 396	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
 397		if (!(tpi->flags&TUNNEL_SEQ) ||
 398		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 399			tunnel->dev->stats.rx_fifo_errors++;
 400			tunnel->dev->stats.rx_errors++;
 401			goto drop;
 402		}
 403		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 404	}
 405
 406	skb_reset_network_header(skb);
 407
 408	err = IP_ECN_decapsulate(iph, skb);
 409	if (unlikely(err)) {
 410		if (log_ecn_error)
 411			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 412					&iph->saddr, iph->tos);
 413		if (err > 1) {
 414			++tunnel->dev->stats.rx_frame_errors;
 415			++tunnel->dev->stats.rx_errors;
 416			goto drop;
 417		}
 418	}
 419
 420	tstats = this_cpu_ptr(tunnel->dev->tstats);
 421	u64_stats_update_begin(&tstats->syncp);
 422	tstats->rx_packets++;
 423	tstats->rx_bytes += skb->len;
 424	u64_stats_update_end(&tstats->syncp);
 425
 426	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 427
 428	if (tunnel->dev->type == ARPHRD_ETHER) {
 429		skb->protocol = eth_type_trans(skb, tunnel->dev);
 430		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 431	} else {
 432		skb->dev = tunnel->dev;
 433	}
 434
 435	if (tun_dst)
 436		skb_dst_set(skb, (struct dst_entry *)tun_dst);
 437
 438	gro_cells_receive(&tunnel->gro_cells, skb);
 439	return 0;
 440
 441drop:
 442	if (tun_dst)
 443		dst_release((struct dst_entry *)tun_dst);
 444	kfree_skb(skb);
 445	return 0;
 446}
 447EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 449int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 450			    unsigned int num)
 451{
 452	if (num >= MAX_IPTUN_ENCAP_OPS)
 453		return -ERANGE;
 454
 455	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 456			&iptun_encaps[num],
 457			NULL, ops) ? 0 : -1;
 458}
 459EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
 460
 461int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 462			    unsigned int num)
 463{
 464	int ret;
 465
 466	if (num >= MAX_IPTUN_ENCAP_OPS)
 467		return -ERANGE;
 468
 469	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 470		       &iptun_encaps[num],
 471		       ops, NULL) == ops) ? 0 : -1;
 472
 473	synchronize_net();
 474
 475	return ret;
 476}
 477EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
 478
 479int ip_tunnel_encap_setup(struct ip_tunnel *t,
 480			  struct ip_tunnel_encap *ipencap)
 481{
 482	int hlen;
 483
 484	memset(&t->encap, 0, sizeof(t->encap));
 485
 486	hlen = ip_encap_hlen(ipencap);
 487	if (hlen < 0)
 488		return hlen;
 489
 490	t->encap.type = ipencap->type;
 491	t->encap.sport = ipencap->sport;
 492	t->encap.dport = ipencap->dport;
 493	t->encap.flags = ipencap->flags;
 494
 495	t->encap_hlen = hlen;
 496	t->hlen = t->encap_hlen + t->tun_hlen;
 497
 498	return 0;
 499}
 500EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 502static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 503			    struct rtable *rt, __be16 df,
 504			    const struct iphdr *inner_iph)
 505{
 506	struct ip_tunnel *tunnel = netdev_priv(dev);
 507	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
 508	int mtu;
 509
 510	if (df)
 511		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 512					- sizeof(struct iphdr) - tunnel->hlen;
 513	else
 514		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 515
 516	skb_dst_update_pmtu(skb, mtu);
 
 517
 518	if (skb->protocol == htons(ETH_P_IP)) {
 519		if (!skb_is_gso(skb) &&
 520		    (inner_iph->frag_off & htons(IP_DF)) &&
 521		    mtu < pkt_size) {
 522			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 523			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 524			return -E2BIG;
 525		}
 526	}
 527#if IS_ENABLED(CONFIG_IPV6)
 528	else if (skb->protocol == htons(ETH_P_IPV6)) {
 529		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 530
 531		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 532			   mtu >= IPV6_MIN_MTU) {
 533			if ((tunnel->parms.iph.daddr &&
 534			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 535			    rt6->rt6i_dst.plen == 128) {
 536				rt6->rt6i_flags |= RTF_MODIFIED;
 537				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 538			}
 539		}
 540
 541		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 542					mtu < pkt_size) {
 543			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 544			return -E2BIG;
 545		}
 546	}
 547#endif
 548	return 0;
 549}
 550
 551void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 552{
 553	struct ip_tunnel *tunnel = netdev_priv(dev);
 554	u32 headroom = sizeof(struct iphdr);
 555	struct ip_tunnel_info *tun_info;
 556	const struct ip_tunnel_key *key;
 557	const struct iphdr *inner_iph;
 558	struct rtable *rt;
 559	struct flowi4 fl4;
 560	__be16 df = 0;
 561	u8 tos, ttl;
 562
 563	tun_info = skb_tunnel_info(skb);
 564	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 565		     ip_tunnel_info_af(tun_info) != AF_INET))
 566		goto tx_error;
 567	key = &tun_info->key;
 568	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 569	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 570	tos = key->tos;
 571	if (tos == 1) {
 572		if (skb->protocol == htons(ETH_P_IP))
 573			tos = inner_iph->tos;
 574		else if (skb->protocol == htons(ETH_P_IPV6))
 575			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 576	}
 577	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
 578			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
 579	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 580		goto tx_error;
 581	rt = ip_route_output_key(tunnel->net, &fl4);
 582	if (IS_ERR(rt)) {
 583		dev->stats.tx_carrier_errors++;
 584		goto tx_error;
 585	}
 586	if (rt->dst.dev == dev) {
 587		ip_rt_put(rt);
 588		dev->stats.collisions++;
 589		goto tx_error;
 590	}
 591	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 592	ttl = key->ttl;
 593	if (ttl == 0) {
 594		if (skb->protocol == htons(ETH_P_IP))
 595			ttl = inner_iph->ttl;
 596		else if (skb->protocol == htons(ETH_P_IPV6))
 597			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 598		else
 599			ttl = ip4_dst_hoplimit(&rt->dst);
 600	}
 601	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
 602		df = htons(IP_DF);
 603	else if (skb->protocol == htons(ETH_P_IP))
 604		df = inner_iph->frag_off & htons(IP_DF);
 605	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 606	if (headroom > dev->needed_headroom)
 607		dev->needed_headroom = headroom;
 608
 609	if (skb_cow_head(skb, dev->needed_headroom)) {
 610		ip_rt_put(rt);
 611		goto tx_dropped;
 612	}
 613	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
 614		      df, !net_eq(tunnel->net, dev_net(dev)));
 615	return;
 616tx_error:
 617	dev->stats.tx_errors++;
 618	goto kfree;
 619tx_dropped:
 620	dev->stats.tx_dropped++;
 621kfree:
 622	kfree_skb(skb);
 623}
 624EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
 625
 626void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 627		    const struct iphdr *tnl_params, u8 protocol)
 628{
 629	struct ip_tunnel *tunnel = netdev_priv(dev);
 630	const struct iphdr *inner_iph;
 631	struct flowi4 fl4;
 632	u8     tos, ttl;
 633	__be16 df;
 634	struct rtable *rt;		/* Route to the other host */
 635	unsigned int max_headroom;	/* The extra header space needed */
 636	__be32 dst;
 637	bool connected;
 638
 639	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 640	connected = (tunnel->parms.iph.daddr != 0);
 641
 642	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 643
 644	dst = tnl_params->daddr;
 645	if (dst == 0) {
 646		/* NBMA tunnel */
 647
 648		if (!skb_dst(skb)) {
 649			dev->stats.tx_fifo_errors++;
 650			goto tx_error;
 651		}
 652
 653		if (skb->protocol == htons(ETH_P_IP)) {
 654			rt = skb_rtable(skb);
 655			dst = rt_nexthop(rt, inner_iph->daddr);
 656		}
 657#if IS_ENABLED(CONFIG_IPV6)
 658		else if (skb->protocol == htons(ETH_P_IPV6)) {
 659			const struct in6_addr *addr6;
 660			struct neighbour *neigh;
 661			bool do_tx_error_icmp;
 662			int addr_type;
 663
 664			neigh = dst_neigh_lookup(skb_dst(skb),
 665						 &ipv6_hdr(skb)->daddr);
 666			if (!neigh)
 667				goto tx_error;
 668
 669			addr6 = (const struct in6_addr *)&neigh->primary_key;
 670			addr_type = ipv6_addr_type(addr6);
 671
 672			if (addr_type == IPV6_ADDR_ANY) {
 673				addr6 = &ipv6_hdr(skb)->daddr;
 674				addr_type = ipv6_addr_type(addr6);
 675			}
 676
 677			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 678				do_tx_error_icmp = true;
 679			else {
 680				do_tx_error_icmp = false;
 681				dst = addr6->s6_addr32[3];
 682			}
 683			neigh_release(neigh);
 684			if (do_tx_error_icmp)
 685				goto tx_error_icmp;
 686		}
 687#endif
 688		else
 689			goto tx_error;
 690
 691		connected = false;
 692	}
 693
 694	tos = tnl_params->tos;
 695	if (tos & 0x1) {
 696		tos &= ~0x1;
 697		if (skb->protocol == htons(ETH_P_IP)) {
 698			tos = inner_iph->tos;
 699			connected = false;
 700		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 701			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 702			connected = false;
 703		}
 704	}
 705
 706	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 707			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
 708			    tunnel->fwmark);
 709
 710	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 711		goto tx_error;
 712
 713	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
 714			 NULL;
 715
 716	if (!rt) {
 717		rt = ip_route_output_key(tunnel->net, &fl4);
 718
 719		if (IS_ERR(rt)) {
 720			dev->stats.tx_carrier_errors++;
 721			goto tx_error;
 722		}
 723		if (connected)
 724			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 725					  fl4.saddr);
 726	}
 727
 728	if (rt->dst.dev == dev) {
 729		ip_rt_put(rt);
 730		dev->stats.collisions++;
 731		goto tx_error;
 732	}
 733
 734	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
 735		ip_rt_put(rt);
 736		goto tx_error;
 737	}
 738
 739	if (tunnel->err_count > 0) {
 740		if (time_before(jiffies,
 741				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 742			tunnel->err_count--;
 743
 744			dst_link_failure(skb);
 745		} else
 746			tunnel->err_count = 0;
 747	}
 748
 749	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 750	ttl = tnl_params->ttl;
 751	if (ttl == 0) {
 752		if (skb->protocol == htons(ETH_P_IP))
 753			ttl = inner_iph->ttl;
 754#if IS_ENABLED(CONFIG_IPV6)
 755		else if (skb->protocol == htons(ETH_P_IPV6))
 756			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 757#endif
 758		else
 759			ttl = ip4_dst_hoplimit(&rt->dst);
 760	}
 761
 762	df = tnl_params->frag_off;
 763	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
 764		df |= (inner_iph->frag_off&htons(IP_DF));
 765
 766	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 767			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 768	if (max_headroom > dev->needed_headroom)
 769		dev->needed_headroom = max_headroom;
 770
 771	if (skb_cow_head(skb, dev->needed_headroom)) {
 772		ip_rt_put(rt);
 773		dev->stats.tx_dropped++;
 774		kfree_skb(skb);
 775		return;
 776	}
 777
 778	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
 779		      df, !net_eq(tunnel->net, dev_net(dev)));
 780	return;
 781
 782#if IS_ENABLED(CONFIG_IPV6)
 783tx_error_icmp:
 784	dst_link_failure(skb);
 785#endif
 786tx_error:
 787	dev->stats.tx_errors++;
 788	kfree_skb(skb);
 789}
 790EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
 791
 792static void ip_tunnel_update(struct ip_tunnel_net *itn,
 793			     struct ip_tunnel *t,
 794			     struct net_device *dev,
 795			     struct ip_tunnel_parm *p,
 796			     bool set_mtu,
 797			     __u32 fwmark)
 798{
 799	ip_tunnel_del(itn, t);
 800	t->parms.iph.saddr = p->iph.saddr;
 801	t->parms.iph.daddr = p->iph.daddr;
 802	t->parms.i_key = p->i_key;
 803	t->parms.o_key = p->o_key;
 804	if (dev->type != ARPHRD_ETHER) {
 805		memcpy(dev->dev_addr, &p->iph.saddr, 4);
 806		memcpy(dev->broadcast, &p->iph.daddr, 4);
 807	}
 808	ip_tunnel_add(itn, t);
 809
 810	t->parms.iph.ttl = p->iph.ttl;
 811	t->parms.iph.tos = p->iph.tos;
 812	t->parms.iph.frag_off = p->iph.frag_off;
 813
 814	if (t->parms.link != p->link || t->fwmark != fwmark) {
 815		int mtu;
 816
 817		t->parms.link = p->link;
 818		t->fwmark = fwmark;
 819		mtu = ip_tunnel_bind_dev(dev);
 820		if (set_mtu)
 821			dev->mtu = mtu;
 822	}
 823	dst_cache_reset(&t->dst_cache);
 824	netdev_state_change(dev);
 825}
 826
 827int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 828{
 829	int err = 0;
 830	struct ip_tunnel *t = netdev_priv(dev);
 831	struct net *net = t->net;
 832	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 833
 
 834	switch (cmd) {
 835	case SIOCGETTUNNEL:
 836		if (dev == itn->fb_tunnel_dev) {
 837			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 838			if (!t)
 839				t = netdev_priv(dev);
 840		}
 841		memcpy(p, &t->parms, sizeof(*p));
 842		break;
 843
 844	case SIOCADDTUNNEL:
 845	case SIOCCHGTUNNEL:
 846		err = -EPERM;
 847		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 848			goto done;
 849		if (p->iph.ttl)
 850			p->iph.frag_off |= htons(IP_DF);
 851		if (!(p->i_flags & VTI_ISVTI)) {
 852			if (!(p->i_flags & TUNNEL_KEY))
 853				p->i_key = 0;
 854			if (!(p->o_flags & TUNNEL_KEY))
 855				p->o_key = 0;
 856		}
 857
 858		t = ip_tunnel_find(itn, p, itn->type);
 859
 860		if (cmd == SIOCADDTUNNEL) {
 861			if (!t) {
 862				t = ip_tunnel_create(net, itn, p);
 863				err = PTR_ERR_OR_ZERO(t);
 864				break;
 865			}
 866
 867			err = -EEXIST;
 868			break;
 869		}
 870		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 871			if (t) {
 872				if (t->dev != dev) {
 873					err = -EEXIST;
 874					break;
 875				}
 876			} else {
 877				unsigned int nflags = 0;
 878
 879				if (ipv4_is_multicast(p->iph.daddr))
 880					nflags = IFF_BROADCAST;
 881				else if (p->iph.daddr)
 882					nflags = IFF_POINTOPOINT;
 883
 884				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 885					err = -EINVAL;
 886					break;
 887				}
 888
 889				t = netdev_priv(dev);
 890			}
 891		}
 892
 893		if (t) {
 894			err = 0;
 895			ip_tunnel_update(itn, t, dev, p, true, 0);
 896		} else {
 897			err = -ENOENT;
 898		}
 899		break;
 900
 901	case SIOCDELTUNNEL:
 902		err = -EPERM;
 903		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 904			goto done;
 905
 906		if (dev == itn->fb_tunnel_dev) {
 907			err = -ENOENT;
 908			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 909			if (!t)
 910				goto done;
 911			err = -EPERM;
 912			if (t == netdev_priv(itn->fb_tunnel_dev))
 913				goto done;
 914			dev = t->dev;
 915		}
 916		unregister_netdevice(dev);
 917		err = 0;
 918		break;
 919
 920	default:
 921		err = -EINVAL;
 922	}
 923
 924done:
 925	return err;
 926}
 927EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 928
 929int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 930{
 931	struct ip_tunnel *tunnel = netdev_priv(dev);
 932	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 933	int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 934
 935	if (new_mtu < ETH_MIN_MTU)
 936		return -EINVAL;
 937
 938	if (new_mtu > max_mtu) {
 939		if (strict)
 940			return -EINVAL;
 941
 942		new_mtu = max_mtu;
 943	}
 944
 945	dev->mtu = new_mtu;
 946	return 0;
 947}
 948EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
 949
 950int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 951{
 952	return __ip_tunnel_change_mtu(dev, new_mtu, true);
 953}
 954EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 955
 956static void ip_tunnel_dev_free(struct net_device *dev)
 957{
 958	struct ip_tunnel *tunnel = netdev_priv(dev);
 959
 960	gro_cells_destroy(&tunnel->gro_cells);
 961	dst_cache_destroy(&tunnel->dst_cache);
 962	free_percpu(dev->tstats);
 
 963}
 964
 965void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
 966{
 967	struct ip_tunnel *tunnel = netdev_priv(dev);
 968	struct ip_tunnel_net *itn;
 969
 970	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
 971
 972	if (itn->fb_tunnel_dev != dev) {
 973		ip_tunnel_del(itn, netdev_priv(dev));
 974		unregister_netdevice_queue(dev, head);
 975	}
 976}
 977EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
 978
 979struct net *ip_tunnel_get_link_net(const struct net_device *dev)
 980{
 981	struct ip_tunnel *tunnel = netdev_priv(dev);
 982
 983	return tunnel->net;
 984}
 985EXPORT_SYMBOL(ip_tunnel_get_link_net);
 986
 987int ip_tunnel_get_iflink(const struct net_device *dev)
 988{
 989	struct ip_tunnel *tunnel = netdev_priv(dev);
 990
 991	return tunnel->parms.link;
 992}
 993EXPORT_SYMBOL(ip_tunnel_get_iflink);
 994
 995int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 996				  struct rtnl_link_ops *ops, char *devname)
 997{
 998	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
 999	struct ip_tunnel_parm parms;
1000	unsigned int i;
1001
1002	itn->rtnl_link_ops = ops;
1003	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1004		INIT_HLIST_HEAD(&itn->tunnels[i]);
1005
1006	if (!ops || !net_has_fallback_tunnels(net)) {
1007		struct ip_tunnel_net *it_init_net;
1008
1009		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1010		itn->type = it_init_net->type;
1011		itn->fb_tunnel_dev = NULL;
1012		return 0;
1013	}
1014
1015	memset(&parms, 0, sizeof(parms));
1016	if (devname)
1017		strlcpy(parms.name, devname, IFNAMSIZ);
1018
1019	rtnl_lock();
1020	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1021	/* FB netdevice is special: we have one, and only one per netns.
1022	 * Allowing to move it to another netns is clearly unsafe.
1023	 */
1024	if (!IS_ERR(itn->fb_tunnel_dev)) {
1025		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1026		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1027		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1028		itn->type = itn->fb_tunnel_dev->type;
1029	}
1030	rtnl_unlock();
1031
1032	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1033}
1034EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1035
1036static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1037			      struct list_head *head,
1038			      struct rtnl_link_ops *ops)
1039{
 
1040	struct net_device *dev, *aux;
1041	int h;
1042
1043	for_each_netdev_safe(net, dev, aux)
1044		if (dev->rtnl_link_ops == ops)
1045			unregister_netdevice_queue(dev, head);
1046
1047	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1048		struct ip_tunnel *t;
1049		struct hlist_node *n;
1050		struct hlist_head *thead = &itn->tunnels[h];
1051
1052		hlist_for_each_entry_safe(t, n, thead, hash_node)
1053			/* If dev is in the same netns, it has already
1054			 * been added to the list by the previous loop.
1055			 */
1056			if (!net_eq(dev_net(t->dev), net))
1057				unregister_netdevice_queue(t->dev, head);
1058	}
1059}
1060
1061void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1062			   struct rtnl_link_ops *ops)
1063{
1064	struct ip_tunnel_net *itn;
1065	struct net *net;
1066	LIST_HEAD(list);
1067
1068	rtnl_lock();
1069	list_for_each_entry(net, net_list, exit_list) {
1070		itn = net_generic(net, id);
1071		ip_tunnel_destroy(net, itn, &list, ops);
1072	}
1073	unregister_netdevice_many(&list);
1074	rtnl_unlock();
1075}
1076EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1077
1078int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1079		      struct ip_tunnel_parm *p, __u32 fwmark)
1080{
1081	struct ip_tunnel *nt;
1082	struct net *net = dev_net(dev);
1083	struct ip_tunnel_net *itn;
1084	int mtu;
1085	int err;
1086
1087	nt = netdev_priv(dev);
1088	itn = net_generic(net, nt->ip_tnl_net_id);
1089
1090	if (nt->collect_md) {
1091		if (rtnl_dereference(itn->collect_md_tun))
1092			return -EEXIST;
1093	} else {
1094		if (ip_tunnel_find(itn, p, dev->type))
1095			return -EEXIST;
1096	}
1097
1098	nt->net = net;
1099	nt->parms = *p;
1100	nt->fwmark = fwmark;
1101	err = register_netdevice(dev);
1102	if (err)
1103		goto err_register_netdevice;
1104
1105	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1106		eth_hw_addr_random(dev);
1107
1108	mtu = ip_tunnel_bind_dev(dev);
1109	if (tb[IFLA_MTU]) {
1110		unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1111
1112		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1113			    (unsigned int)(max - sizeof(struct iphdr)));
1114	}
1115
1116	err = dev_set_mtu(dev, mtu);
1117	if (err)
1118		goto err_dev_set_mtu;
1119
1120	ip_tunnel_add(itn, nt);
1121	return 0;
1122
1123err_dev_set_mtu:
1124	unregister_netdevice(dev);
1125err_register_netdevice:
1126	return err;
1127}
1128EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1129
1130int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1131			 struct ip_tunnel_parm *p, __u32 fwmark)
1132{
1133	struct ip_tunnel *t;
1134	struct ip_tunnel *tunnel = netdev_priv(dev);
1135	struct net *net = tunnel->net;
1136	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1137
1138	if (dev == itn->fb_tunnel_dev)
1139		return -EINVAL;
1140
1141	t = ip_tunnel_find(itn, p, dev->type);
1142
1143	if (t) {
1144		if (t->dev != dev)
1145			return -EEXIST;
1146	} else {
1147		t = tunnel;
1148
1149		if (dev->type != ARPHRD_ETHER) {
1150			unsigned int nflags = 0;
1151
1152			if (ipv4_is_multicast(p->iph.daddr))
1153				nflags = IFF_BROADCAST;
1154			else if (p->iph.daddr)
1155				nflags = IFF_POINTOPOINT;
1156
1157			if ((dev->flags ^ nflags) &
1158			    (IFF_POINTOPOINT | IFF_BROADCAST))
1159				return -EINVAL;
1160		}
1161	}
1162
1163	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1164	return 0;
1165}
1166EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1167
1168int ip_tunnel_init(struct net_device *dev)
1169{
1170	struct ip_tunnel *tunnel = netdev_priv(dev);
1171	struct iphdr *iph = &tunnel->parms.iph;
1172	int err;
1173
1174	dev->needs_free_netdev = true;
1175	dev->priv_destructor = ip_tunnel_dev_free;
1176	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1177	if (!dev->tstats)
1178		return -ENOMEM;
1179
1180	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1181	if (err) {
1182		free_percpu(dev->tstats);
1183		return err;
1184	}
1185
1186	err = gro_cells_init(&tunnel->gro_cells, dev);
1187	if (err) {
1188		dst_cache_destroy(&tunnel->dst_cache);
1189		free_percpu(dev->tstats);
1190		return err;
1191	}
1192
1193	tunnel->dev = dev;
1194	tunnel->net = dev_net(dev);
1195	strcpy(tunnel->parms.name, dev->name);
1196	iph->version		= 4;
1197	iph->ihl		= 5;
1198
1199	if (tunnel->collect_md) {
1200		dev->features |= NETIF_F_NETNS_LOCAL;
1201		netif_keep_dst(dev);
1202	}
1203	return 0;
1204}
1205EXPORT_SYMBOL_GPL(ip_tunnel_init);
1206
1207void ip_tunnel_uninit(struct net_device *dev)
1208{
1209	struct ip_tunnel *tunnel = netdev_priv(dev);
1210	struct net *net = tunnel->net;
1211	struct ip_tunnel_net *itn;
1212
1213	itn = net_generic(net, tunnel->ip_tnl_net_id);
1214	/* fb_tunnel_dev will be unregisted in net-exit call. */
1215	if (itn->fb_tunnel_dev != dev)
1216		ip_tunnel_del(itn, netdev_priv(dev));
1217
1218	dst_cache_reset(&tunnel->dst_cache);
1219}
1220EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1221
1222/* Do least required initialization, rest of init is done in tunnel_init call */
1223void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1224{
1225	struct ip_tunnel *tunnel = netdev_priv(dev);
1226	tunnel->ip_tnl_net_id = net_id;
1227}
1228EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1229
1230MODULE_LICENSE("GPL");
v4.6
   1/*
   2 * Copyright (c) 2013 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/capability.h>
  22#include <linux/module.h>
  23#include <linux/types.h>
  24#include <linux/kernel.h>
  25#include <linux/slab.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/init.h>
  34#include <linux/in6.h>
  35#include <linux/inetdevice.h>
  36#include <linux/igmp.h>
  37#include <linux/netfilter_ipv4.h>
  38#include <linux/etherdevice.h>
  39#include <linux/if_ether.h>
  40#include <linux/if_vlan.h>
  41#include <linux/rculist.h>
  42#include <linux/err.h>
  43
  44#include <net/sock.h>
  45#include <net/ip.h>
  46#include <net/icmp.h>
  47#include <net/protocol.h>
  48#include <net/ip_tunnels.h>
  49#include <net/arp.h>
  50#include <net/checksum.h>
  51#include <net/dsfield.h>
  52#include <net/inet_ecn.h>
  53#include <net/xfrm.h>
  54#include <net/net_namespace.h>
  55#include <net/netns/generic.h>
  56#include <net/rtnetlink.h>
  57#include <net/udp.h>
 
  58
  59#if IS_ENABLED(CONFIG_IPV6)
  60#include <net/ipv6.h>
  61#include <net/ip6_fib.h>
  62#include <net/ip6_route.h>
  63#endif
  64
  65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
  66{
  67	return hash_32((__force u32)key ^ (__force u32)remote,
  68			 IP_TNL_HASH_BITS);
  69}
  70
  71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
  72				__be16 flags, __be32 key)
  73{
  74	if (p->i_flags & TUNNEL_KEY) {
  75		if (flags & TUNNEL_KEY)
  76			return key == p->i_key;
  77		else
  78			/* key expected, none present */
  79			return false;
  80	} else
  81		return !(flags & TUNNEL_KEY);
  82}
  83
  84/* Fallback tunnel: no source, no destination, no key, no options
  85
  86   Tunnel hash table:
  87   We require exact key match i.e. if a key is present in packet
  88   it will match only tunnel with the same key; if it is not present,
  89   it will match only keyless tunnel.
  90
  91   All keysless packets, if not matched configured keyless tunnels
  92   will match fallback tunnel.
  93   Given src, dst and key, find appropriate for input tunnel.
  94*/
  95struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  96				   int link, __be16 flags,
  97				   __be32 remote, __be32 local,
  98				   __be32 key)
  99{
 100	unsigned int hash;
 101	struct ip_tunnel *t, *cand = NULL;
 102	struct hlist_head *head;
 103
 104	hash = ip_tunnel_hash(key, remote);
 105	head = &itn->tunnels[hash];
 106
 107	hlist_for_each_entry_rcu(t, head, hash_node) {
 108		if (local != t->parms.iph.saddr ||
 109		    remote != t->parms.iph.daddr ||
 110		    !(t->dev->flags & IFF_UP))
 111			continue;
 112
 113		if (!ip_tunnel_key_match(&t->parms, flags, key))
 114			continue;
 115
 116		if (t->parms.link == link)
 117			return t;
 118		else
 119			cand = t;
 120	}
 121
 122	hlist_for_each_entry_rcu(t, head, hash_node) {
 123		if (remote != t->parms.iph.daddr ||
 124		    t->parms.iph.saddr != 0 ||
 125		    !(t->dev->flags & IFF_UP))
 126			continue;
 127
 128		if (!ip_tunnel_key_match(&t->parms, flags, key))
 129			continue;
 130
 131		if (t->parms.link == link)
 132			return t;
 133		else if (!cand)
 134			cand = t;
 135	}
 136
 137	hash = ip_tunnel_hash(key, 0);
 138	head = &itn->tunnels[hash];
 139
 140	hlist_for_each_entry_rcu(t, head, hash_node) {
 141		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
 142		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
 143			continue;
 144
 145		if (!(t->dev->flags & IFF_UP))
 146			continue;
 147
 148		if (!ip_tunnel_key_match(&t->parms, flags, key))
 149			continue;
 150
 151		if (t->parms.link == link)
 152			return t;
 153		else if (!cand)
 154			cand = t;
 155	}
 156
 157	if (flags & TUNNEL_NO_KEY)
 158		goto skip_key_lookup;
 159
 160	hlist_for_each_entry_rcu(t, head, hash_node) {
 161		if (t->parms.i_key != key ||
 162		    t->parms.iph.saddr != 0 ||
 163		    t->parms.iph.daddr != 0 ||
 164		    !(t->dev->flags & IFF_UP))
 165			continue;
 166
 167		if (t->parms.link == link)
 168			return t;
 169		else if (!cand)
 170			cand = t;
 171	}
 172
 173skip_key_lookup:
 174	if (cand)
 175		return cand;
 176
 177	t = rcu_dereference(itn->collect_md_tun);
 178	if (t)
 179		return t;
 180
 181	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
 182		return netdev_priv(itn->fb_tunnel_dev);
 183
 184	return NULL;
 185}
 186EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
 187
 188static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 189				    struct ip_tunnel_parm *parms)
 190{
 191	unsigned int h;
 192	__be32 remote;
 193	__be32 i_key = parms->i_key;
 194
 195	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
 196		remote = parms->iph.daddr;
 197	else
 198		remote = 0;
 199
 200	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
 201		i_key = 0;
 202
 203	h = ip_tunnel_hash(i_key, remote);
 204	return &itn->tunnels[h];
 205}
 206
 207static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 208{
 209	struct hlist_head *head = ip_bucket(itn, &t->parms);
 210
 211	if (t->collect_md)
 212		rcu_assign_pointer(itn->collect_md_tun, t);
 213	hlist_add_head_rcu(&t->hash_node, head);
 214}
 215
 216static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 217{
 218	if (t->collect_md)
 219		rcu_assign_pointer(itn->collect_md_tun, NULL);
 220	hlist_del_init_rcu(&t->hash_node);
 221}
 222
 223static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 224					struct ip_tunnel_parm *parms,
 225					int type)
 226{
 227	__be32 remote = parms->iph.daddr;
 228	__be32 local = parms->iph.saddr;
 229	__be32 key = parms->i_key;
 230	__be16 flags = parms->i_flags;
 231	int link = parms->link;
 232	struct ip_tunnel *t = NULL;
 233	struct hlist_head *head = ip_bucket(itn, parms);
 234
 235	hlist_for_each_entry_rcu(t, head, hash_node) {
 236		if (local == t->parms.iph.saddr &&
 237		    remote == t->parms.iph.daddr &&
 238		    link == t->parms.link &&
 239		    type == t->dev->type &&
 240		    ip_tunnel_key_match(&t->parms, flags, key))
 241			break;
 242	}
 243	return t;
 244}
 245
 246static struct net_device *__ip_tunnel_create(struct net *net,
 247					     const struct rtnl_link_ops *ops,
 248					     struct ip_tunnel_parm *parms)
 249{
 250	int err;
 251	struct ip_tunnel *tunnel;
 252	struct net_device *dev;
 253	char name[IFNAMSIZ];
 254
 255	if (parms->name[0])
 
 
 
 256		strlcpy(name, parms->name, IFNAMSIZ);
 257	else {
 258		if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
 259			err = -E2BIG;
 260			goto failed;
 261		}
 262		strlcpy(name, ops->kind, IFNAMSIZ);
 263		strncat(name, "%d", 2);
 264	}
 265
 266	ASSERT_RTNL();
 267	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 268	if (!dev) {
 269		err = -ENOMEM;
 270		goto failed;
 271	}
 272	dev_net_set(dev, net);
 273
 274	dev->rtnl_link_ops = ops;
 275
 276	tunnel = netdev_priv(dev);
 277	tunnel->parms = *parms;
 278	tunnel->net = net;
 279
 280	err = register_netdevice(dev);
 281	if (err)
 282		goto failed_free;
 283
 284	return dev;
 285
 286failed_free:
 287	free_netdev(dev);
 288failed:
 289	return ERR_PTR(err);
 290}
 291
 292static inline void init_tunnel_flow(struct flowi4 *fl4,
 293				    int proto,
 294				    __be32 daddr, __be32 saddr,
 295				    __be32 key, __u8 tos, int oif)
 296{
 297	memset(fl4, 0, sizeof(*fl4));
 298	fl4->flowi4_oif = oif;
 299	fl4->daddr = daddr;
 300	fl4->saddr = saddr;
 301	fl4->flowi4_tos = tos;
 302	fl4->flowi4_proto = proto;
 303	fl4->fl4_gre_key = key;
 304}
 305
 306static int ip_tunnel_bind_dev(struct net_device *dev)
 307{
 308	struct net_device *tdev = NULL;
 309	struct ip_tunnel *tunnel = netdev_priv(dev);
 310	const struct iphdr *iph;
 311	int hlen = LL_MAX_HEADER;
 312	int mtu = ETH_DATA_LEN;
 313	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 314
 315	iph = &tunnel->parms.iph;
 316
 317	/* Guess output device to choose reasonable mtu and needed_headroom */
 318	if (iph->daddr) {
 319		struct flowi4 fl4;
 320		struct rtable *rt;
 321
 322		init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
 323				 iph->saddr, tunnel->parms.o_key,
 324				 RT_TOS(iph->tos), tunnel->parms.link);
 
 325		rt = ip_route_output_key(tunnel->net, &fl4);
 326
 327		if (!IS_ERR(rt)) {
 328			tdev = rt->dst.dev;
 329			ip_rt_put(rt);
 330		}
 331		if (dev->type != ARPHRD_ETHER)
 332			dev->flags |= IFF_POINTOPOINT;
 333
 334		dst_cache_reset(&tunnel->dst_cache);
 335	}
 336
 337	if (!tdev && tunnel->parms.link)
 338		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 339
 340	if (tdev) {
 341		hlen = tdev->hard_header_len + tdev->needed_headroom;
 342		mtu = tdev->mtu;
 343	}
 344
 345	dev->needed_headroom = t_hlen + hlen;
 346	mtu -= (dev->hard_header_len + t_hlen);
 347
 348	if (mtu < 68)
 349		mtu = 68;
 350
 351	return mtu;
 352}
 353
 354static struct ip_tunnel *ip_tunnel_create(struct net *net,
 355					  struct ip_tunnel_net *itn,
 356					  struct ip_tunnel_parm *parms)
 357{
 358	struct ip_tunnel *nt;
 359	struct net_device *dev;
 
 
 
 360
 361	BUG_ON(!itn->fb_tunnel_dev);
 362	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
 363	if (IS_ERR(dev))
 364		return ERR_CAST(dev);
 365
 366	dev->mtu = ip_tunnel_bind_dev(dev);
 
 
 
 367
 368	nt = netdev_priv(dev);
 
 
 
 369	ip_tunnel_add(itn, nt);
 370	return nt;
 
 
 
 
 371}
 372
 373int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 374		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 375		  bool log_ecn_error)
 376{
 377	struct pcpu_sw_netstats *tstats;
 378	const struct iphdr *iph = ip_hdr(skb);
 379	int err;
 380
 381#ifdef CONFIG_NET_IPGRE_BROADCAST
 382	if (ipv4_is_multicast(iph->daddr)) {
 383		tunnel->dev->stats.multicast++;
 384		skb->pkt_type = PACKET_BROADCAST;
 385	}
 386#endif
 387
 388	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
 389	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
 390		tunnel->dev->stats.rx_crc_errors++;
 391		tunnel->dev->stats.rx_errors++;
 392		goto drop;
 393	}
 394
 395	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
 396		if (!(tpi->flags&TUNNEL_SEQ) ||
 397		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 398			tunnel->dev->stats.rx_fifo_errors++;
 399			tunnel->dev->stats.rx_errors++;
 400			goto drop;
 401		}
 402		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 403	}
 404
 405	skb_reset_network_header(skb);
 406
 407	err = IP_ECN_decapsulate(iph, skb);
 408	if (unlikely(err)) {
 409		if (log_ecn_error)
 410			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 411					&iph->saddr, iph->tos);
 412		if (err > 1) {
 413			++tunnel->dev->stats.rx_frame_errors;
 414			++tunnel->dev->stats.rx_errors;
 415			goto drop;
 416		}
 417	}
 418
 419	tstats = this_cpu_ptr(tunnel->dev->tstats);
 420	u64_stats_update_begin(&tstats->syncp);
 421	tstats->rx_packets++;
 422	tstats->rx_bytes += skb->len;
 423	u64_stats_update_end(&tstats->syncp);
 424
 425	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 426
 427	if (tunnel->dev->type == ARPHRD_ETHER) {
 428		skb->protocol = eth_type_trans(skb, tunnel->dev);
 429		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 430	} else {
 431		skb->dev = tunnel->dev;
 432	}
 433
 434	if (tun_dst)
 435		skb_dst_set(skb, (struct dst_entry *)tun_dst);
 436
 437	gro_cells_receive(&tunnel->gro_cells, skb);
 438	return 0;
 439
 440drop:
 
 
 441	kfree_skb(skb);
 442	return 0;
 443}
 444EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 445
 446static int ip_encap_hlen(struct ip_tunnel_encap *e)
 447{
 448	const struct ip_tunnel_encap_ops *ops;
 449	int hlen = -EINVAL;
 450
 451	if (e->type == TUNNEL_ENCAP_NONE)
 452		return 0;
 453
 454	if (e->type >= MAX_IPTUN_ENCAP_OPS)
 455		return -EINVAL;
 456
 457	rcu_read_lock();
 458	ops = rcu_dereference(iptun_encaps[e->type]);
 459	if (likely(ops && ops->encap_hlen))
 460		hlen = ops->encap_hlen(e);
 461	rcu_read_unlock();
 462
 463	return hlen;
 464}
 465
 466const struct ip_tunnel_encap_ops __rcu *
 467		iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
 468
 469int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 470			    unsigned int num)
 471{
 472	if (num >= MAX_IPTUN_ENCAP_OPS)
 473		return -ERANGE;
 474
 475	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 476			&iptun_encaps[num],
 477			NULL, ops) ? 0 : -1;
 478}
 479EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
 480
 481int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 482			    unsigned int num)
 483{
 484	int ret;
 485
 486	if (num >= MAX_IPTUN_ENCAP_OPS)
 487		return -ERANGE;
 488
 489	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 490		       &iptun_encaps[num],
 491		       ops, NULL) == ops) ? 0 : -1;
 492
 493	synchronize_net();
 494
 495	return ret;
 496}
 497EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
 498
 499int ip_tunnel_encap_setup(struct ip_tunnel *t,
 500			  struct ip_tunnel_encap *ipencap)
 501{
 502	int hlen;
 503
 504	memset(&t->encap, 0, sizeof(t->encap));
 505
 506	hlen = ip_encap_hlen(ipencap);
 507	if (hlen < 0)
 508		return hlen;
 509
 510	t->encap.type = ipencap->type;
 511	t->encap.sport = ipencap->sport;
 512	t->encap.dport = ipencap->dport;
 513	t->encap.flags = ipencap->flags;
 514
 515	t->encap_hlen = hlen;
 516	t->hlen = t->encap_hlen + t->tun_hlen;
 517
 518	return 0;
 519}
 520EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 521
 522int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 523		    u8 *protocol, struct flowi4 *fl4)
 524{
 525	const struct ip_tunnel_encap_ops *ops;
 526	int ret = -EINVAL;
 527
 528	if (t->encap.type == TUNNEL_ENCAP_NONE)
 529		return 0;
 530
 531	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
 532		return -EINVAL;
 533
 534	rcu_read_lock();
 535	ops = rcu_dereference(iptun_encaps[t->encap.type]);
 536	if (likely(ops && ops->build_header))
 537		ret = ops->build_header(skb, &t->encap, protocol, fl4);
 538	rcu_read_unlock();
 539
 540	return ret;
 541}
 542EXPORT_SYMBOL(ip_tunnel_encap);
 543
 544static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 545			    struct rtable *rt, __be16 df,
 546			    const struct iphdr *inner_iph)
 547{
 548	struct ip_tunnel *tunnel = netdev_priv(dev);
 549	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
 550	int mtu;
 551
 552	if (df)
 553		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 554					- sizeof(struct iphdr) - tunnel->hlen;
 555	else
 556		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 557
 558	if (skb_dst(skb))
 559		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 560
 561	if (skb->protocol == htons(ETH_P_IP)) {
 562		if (!skb_is_gso(skb) &&
 563		    (inner_iph->frag_off & htons(IP_DF)) &&
 564		    mtu < pkt_size) {
 565			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 566			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 567			return -E2BIG;
 568		}
 569	}
 570#if IS_ENABLED(CONFIG_IPV6)
 571	else if (skb->protocol == htons(ETH_P_IPV6)) {
 572		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 573
 574		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 575			   mtu >= IPV6_MIN_MTU) {
 576			if ((tunnel->parms.iph.daddr &&
 577			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 578			    rt6->rt6i_dst.plen == 128) {
 579				rt6->rt6i_flags |= RTF_MODIFIED;
 580				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 581			}
 582		}
 583
 584		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 585					mtu < pkt_size) {
 586			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 587			return -E2BIG;
 588		}
 589	}
 590#endif
 591	return 0;
 592}
 593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 594void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 595		    const struct iphdr *tnl_params, u8 protocol)
 596{
 597	struct ip_tunnel *tunnel = netdev_priv(dev);
 598	const struct iphdr *inner_iph;
 599	struct flowi4 fl4;
 600	u8     tos, ttl;
 601	__be16 df;
 602	struct rtable *rt;		/* Route to the other host */
 603	unsigned int max_headroom;	/* The extra header space needed */
 604	__be32 dst;
 605	bool connected;
 606
 607	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 608	connected = (tunnel->parms.iph.daddr != 0);
 609
 610	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 611
 612	dst = tnl_params->daddr;
 613	if (dst == 0) {
 614		/* NBMA tunnel */
 615
 616		if (!skb_dst(skb)) {
 617			dev->stats.tx_fifo_errors++;
 618			goto tx_error;
 619		}
 620
 621		if (skb->protocol == htons(ETH_P_IP)) {
 622			rt = skb_rtable(skb);
 623			dst = rt_nexthop(rt, inner_iph->daddr);
 624		}
 625#if IS_ENABLED(CONFIG_IPV6)
 626		else if (skb->protocol == htons(ETH_P_IPV6)) {
 627			const struct in6_addr *addr6;
 628			struct neighbour *neigh;
 629			bool do_tx_error_icmp;
 630			int addr_type;
 631
 632			neigh = dst_neigh_lookup(skb_dst(skb),
 633						 &ipv6_hdr(skb)->daddr);
 634			if (!neigh)
 635				goto tx_error;
 636
 637			addr6 = (const struct in6_addr *)&neigh->primary_key;
 638			addr_type = ipv6_addr_type(addr6);
 639
 640			if (addr_type == IPV6_ADDR_ANY) {
 641				addr6 = &ipv6_hdr(skb)->daddr;
 642				addr_type = ipv6_addr_type(addr6);
 643			}
 644
 645			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 646				do_tx_error_icmp = true;
 647			else {
 648				do_tx_error_icmp = false;
 649				dst = addr6->s6_addr32[3];
 650			}
 651			neigh_release(neigh);
 652			if (do_tx_error_icmp)
 653				goto tx_error_icmp;
 654		}
 655#endif
 656		else
 657			goto tx_error;
 658
 659		connected = false;
 660	}
 661
 662	tos = tnl_params->tos;
 663	if (tos & 0x1) {
 664		tos &= ~0x1;
 665		if (skb->protocol == htons(ETH_P_IP)) {
 666			tos = inner_iph->tos;
 667			connected = false;
 668		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 669			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 670			connected = false;
 671		}
 672	}
 673
 674	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
 675			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
 
 676
 677	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 678		goto tx_error;
 679
 680	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
 681			 NULL;
 682
 683	if (!rt) {
 684		rt = ip_route_output_key(tunnel->net, &fl4);
 685
 686		if (IS_ERR(rt)) {
 687			dev->stats.tx_carrier_errors++;
 688			goto tx_error;
 689		}
 690		if (connected)
 691			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 692					  fl4.saddr);
 693	}
 694
 695	if (rt->dst.dev == dev) {
 696		ip_rt_put(rt);
 697		dev->stats.collisions++;
 698		goto tx_error;
 699	}
 700
 701	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
 702		ip_rt_put(rt);
 703		goto tx_error;
 704	}
 705
 706	if (tunnel->err_count > 0) {
 707		if (time_before(jiffies,
 708				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 709			tunnel->err_count--;
 710
 711			dst_link_failure(skb);
 712		} else
 713			tunnel->err_count = 0;
 714	}
 715
 716	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 717	ttl = tnl_params->ttl;
 718	if (ttl == 0) {
 719		if (skb->protocol == htons(ETH_P_IP))
 720			ttl = inner_iph->ttl;
 721#if IS_ENABLED(CONFIG_IPV6)
 722		else if (skb->protocol == htons(ETH_P_IPV6))
 723			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 724#endif
 725		else
 726			ttl = ip4_dst_hoplimit(&rt->dst);
 727	}
 728
 729	df = tnl_params->frag_off;
 730	if (skb->protocol == htons(ETH_P_IP))
 731		df |= (inner_iph->frag_off&htons(IP_DF));
 732
 733	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 734			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 735	if (max_headroom > dev->needed_headroom)
 736		dev->needed_headroom = max_headroom;
 737
 738	if (skb_cow_head(skb, dev->needed_headroom)) {
 739		ip_rt_put(rt);
 740		dev->stats.tx_dropped++;
 741		kfree_skb(skb);
 742		return;
 743	}
 744
 745	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
 746		      df, !net_eq(tunnel->net, dev_net(dev)));
 747	return;
 748
 749#if IS_ENABLED(CONFIG_IPV6)
 750tx_error_icmp:
 751	dst_link_failure(skb);
 752#endif
 753tx_error:
 754	dev->stats.tx_errors++;
 755	kfree_skb(skb);
 756}
 757EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
 758
 759static void ip_tunnel_update(struct ip_tunnel_net *itn,
 760			     struct ip_tunnel *t,
 761			     struct net_device *dev,
 762			     struct ip_tunnel_parm *p,
 763			     bool set_mtu)
 
 764{
 765	ip_tunnel_del(itn, t);
 766	t->parms.iph.saddr = p->iph.saddr;
 767	t->parms.iph.daddr = p->iph.daddr;
 768	t->parms.i_key = p->i_key;
 769	t->parms.o_key = p->o_key;
 770	if (dev->type != ARPHRD_ETHER) {
 771		memcpy(dev->dev_addr, &p->iph.saddr, 4);
 772		memcpy(dev->broadcast, &p->iph.daddr, 4);
 773	}
 774	ip_tunnel_add(itn, t);
 775
 776	t->parms.iph.ttl = p->iph.ttl;
 777	t->parms.iph.tos = p->iph.tos;
 778	t->parms.iph.frag_off = p->iph.frag_off;
 779
 780	if (t->parms.link != p->link) {
 781		int mtu;
 782
 783		t->parms.link = p->link;
 
 784		mtu = ip_tunnel_bind_dev(dev);
 785		if (set_mtu)
 786			dev->mtu = mtu;
 787	}
 788	dst_cache_reset(&t->dst_cache);
 789	netdev_state_change(dev);
 790}
 791
 792int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 793{
 794	int err = 0;
 795	struct ip_tunnel *t = netdev_priv(dev);
 796	struct net *net = t->net;
 797	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 798
 799	BUG_ON(!itn->fb_tunnel_dev);
 800	switch (cmd) {
 801	case SIOCGETTUNNEL:
 802		if (dev == itn->fb_tunnel_dev) {
 803			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 804			if (!t)
 805				t = netdev_priv(dev);
 806		}
 807		memcpy(p, &t->parms, sizeof(*p));
 808		break;
 809
 810	case SIOCADDTUNNEL:
 811	case SIOCCHGTUNNEL:
 812		err = -EPERM;
 813		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 814			goto done;
 815		if (p->iph.ttl)
 816			p->iph.frag_off |= htons(IP_DF);
 817		if (!(p->i_flags & VTI_ISVTI)) {
 818			if (!(p->i_flags & TUNNEL_KEY))
 819				p->i_key = 0;
 820			if (!(p->o_flags & TUNNEL_KEY))
 821				p->o_key = 0;
 822		}
 823
 824		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 825
 826		if (cmd == SIOCADDTUNNEL) {
 827			if (!t) {
 828				t = ip_tunnel_create(net, itn, p);
 829				err = PTR_ERR_OR_ZERO(t);
 830				break;
 831			}
 832
 833			err = -EEXIST;
 834			break;
 835		}
 836		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 837			if (t) {
 838				if (t->dev != dev) {
 839					err = -EEXIST;
 840					break;
 841				}
 842			} else {
 843				unsigned int nflags = 0;
 844
 845				if (ipv4_is_multicast(p->iph.daddr))
 846					nflags = IFF_BROADCAST;
 847				else if (p->iph.daddr)
 848					nflags = IFF_POINTOPOINT;
 849
 850				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 851					err = -EINVAL;
 852					break;
 853				}
 854
 855				t = netdev_priv(dev);
 856			}
 857		}
 858
 859		if (t) {
 860			err = 0;
 861			ip_tunnel_update(itn, t, dev, p, true);
 862		} else {
 863			err = -ENOENT;
 864		}
 865		break;
 866
 867	case SIOCDELTUNNEL:
 868		err = -EPERM;
 869		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 870			goto done;
 871
 872		if (dev == itn->fb_tunnel_dev) {
 873			err = -ENOENT;
 874			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 875			if (!t)
 876				goto done;
 877			err = -EPERM;
 878			if (t == netdev_priv(itn->fb_tunnel_dev))
 879				goto done;
 880			dev = t->dev;
 881		}
 882		unregister_netdevice(dev);
 883		err = 0;
 884		break;
 885
 886	default:
 887		err = -EINVAL;
 888	}
 889
 890done:
 891	return err;
 892}
 893EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 894
 895int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 896{
 897	struct ip_tunnel *tunnel = netdev_priv(dev);
 898	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 899	int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
 900
 901	if (new_mtu < 68)
 902		return -EINVAL;
 903
 904	if (new_mtu > max_mtu) {
 905		if (strict)
 906			return -EINVAL;
 907
 908		new_mtu = max_mtu;
 909	}
 910
 911	dev->mtu = new_mtu;
 912	return 0;
 913}
 914EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
 915
 916int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 917{
 918	return __ip_tunnel_change_mtu(dev, new_mtu, true);
 919}
 920EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
 921
 922static void ip_tunnel_dev_free(struct net_device *dev)
 923{
 924	struct ip_tunnel *tunnel = netdev_priv(dev);
 925
 926	gro_cells_destroy(&tunnel->gro_cells);
 927	dst_cache_destroy(&tunnel->dst_cache);
 928	free_percpu(dev->tstats);
 929	free_netdev(dev);
 930}
 931
 932void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
 933{
 934	struct ip_tunnel *tunnel = netdev_priv(dev);
 935	struct ip_tunnel_net *itn;
 936
 937	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
 938
 939	if (itn->fb_tunnel_dev != dev) {
 940		ip_tunnel_del(itn, netdev_priv(dev));
 941		unregister_netdevice_queue(dev, head);
 942	}
 943}
 944EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
 945
 946struct net *ip_tunnel_get_link_net(const struct net_device *dev)
 947{
 948	struct ip_tunnel *tunnel = netdev_priv(dev);
 949
 950	return tunnel->net;
 951}
 952EXPORT_SYMBOL(ip_tunnel_get_link_net);
 953
 954int ip_tunnel_get_iflink(const struct net_device *dev)
 955{
 956	struct ip_tunnel *tunnel = netdev_priv(dev);
 957
 958	return tunnel->parms.link;
 959}
 960EXPORT_SYMBOL(ip_tunnel_get_iflink);
 961
 962int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 963				  struct rtnl_link_ops *ops, char *devname)
 964{
 965	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
 966	struct ip_tunnel_parm parms;
 967	unsigned int i;
 968
 
 969	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 970		INIT_HLIST_HEAD(&itn->tunnels[i]);
 971
 972	if (!ops) {
 
 
 
 
 973		itn->fb_tunnel_dev = NULL;
 974		return 0;
 975	}
 976
 977	memset(&parms, 0, sizeof(parms));
 978	if (devname)
 979		strlcpy(parms.name, devname, IFNAMSIZ);
 980
 981	rtnl_lock();
 982	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
 983	/* FB netdevice is special: we have one, and only one per netns.
 984	 * Allowing to move it to another netns is clearly unsafe.
 985	 */
 986	if (!IS_ERR(itn->fb_tunnel_dev)) {
 987		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 988		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 989		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
 
 990	}
 991	rtnl_unlock();
 992
 993	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
 994}
 995EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 996
 997static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
 
 998			      struct rtnl_link_ops *ops)
 999{
1000	struct net *net = dev_net(itn->fb_tunnel_dev);
1001	struct net_device *dev, *aux;
1002	int h;
1003
1004	for_each_netdev_safe(net, dev, aux)
1005		if (dev->rtnl_link_ops == ops)
1006			unregister_netdevice_queue(dev, head);
1007
1008	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1009		struct ip_tunnel *t;
1010		struct hlist_node *n;
1011		struct hlist_head *thead = &itn->tunnels[h];
1012
1013		hlist_for_each_entry_safe(t, n, thead, hash_node)
1014			/* If dev is in the same netns, it has already
1015			 * been added to the list by the previous loop.
1016			 */
1017			if (!net_eq(dev_net(t->dev), net))
1018				unregister_netdevice_queue(t->dev, head);
1019	}
1020}
1021
1022void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
 
1023{
 
 
1024	LIST_HEAD(list);
1025
1026	rtnl_lock();
1027	ip_tunnel_destroy(itn, &list, ops);
 
 
 
1028	unregister_netdevice_many(&list);
1029	rtnl_unlock();
1030}
1031EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1032
1033int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1034		      struct ip_tunnel_parm *p)
1035{
1036	struct ip_tunnel *nt;
1037	struct net *net = dev_net(dev);
1038	struct ip_tunnel_net *itn;
1039	int mtu;
1040	int err;
1041
1042	nt = netdev_priv(dev);
1043	itn = net_generic(net, nt->ip_tnl_net_id);
1044
1045	if (nt->collect_md) {
1046		if (rtnl_dereference(itn->collect_md_tun))
1047			return -EEXIST;
1048	} else {
1049		if (ip_tunnel_find(itn, p, dev->type))
1050			return -EEXIST;
1051	}
1052
1053	nt->net = net;
1054	nt->parms = *p;
 
1055	err = register_netdevice(dev);
1056	if (err)
1057		goto out;
1058
1059	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1060		eth_hw_addr_random(dev);
1061
1062	mtu = ip_tunnel_bind_dev(dev);
1063	if (!tb[IFLA_MTU])
1064		dev->mtu = mtu;
 
 
 
 
 
 
 
 
1065
1066	ip_tunnel_add(itn, nt);
1067out:
 
 
 
 
1068	return err;
1069}
1070EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1071
1072int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1073			 struct ip_tunnel_parm *p)
1074{
1075	struct ip_tunnel *t;
1076	struct ip_tunnel *tunnel = netdev_priv(dev);
1077	struct net *net = tunnel->net;
1078	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1079
1080	if (dev == itn->fb_tunnel_dev)
1081		return -EINVAL;
1082
1083	t = ip_tunnel_find(itn, p, dev->type);
1084
1085	if (t) {
1086		if (t->dev != dev)
1087			return -EEXIST;
1088	} else {
1089		t = tunnel;
1090
1091		if (dev->type != ARPHRD_ETHER) {
1092			unsigned int nflags = 0;
1093
1094			if (ipv4_is_multicast(p->iph.daddr))
1095				nflags = IFF_BROADCAST;
1096			else if (p->iph.daddr)
1097				nflags = IFF_POINTOPOINT;
1098
1099			if ((dev->flags ^ nflags) &
1100			    (IFF_POINTOPOINT | IFF_BROADCAST))
1101				return -EINVAL;
1102		}
1103	}
1104
1105	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1106	return 0;
1107}
1108EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1109
1110int ip_tunnel_init(struct net_device *dev)
1111{
1112	struct ip_tunnel *tunnel = netdev_priv(dev);
1113	struct iphdr *iph = &tunnel->parms.iph;
1114	int err;
1115
1116	dev->destructor	= ip_tunnel_dev_free;
 
1117	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1118	if (!dev->tstats)
1119		return -ENOMEM;
1120
1121	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1122	if (err) {
1123		free_percpu(dev->tstats);
1124		return err;
1125	}
1126
1127	err = gro_cells_init(&tunnel->gro_cells, dev);
1128	if (err) {
1129		dst_cache_destroy(&tunnel->dst_cache);
1130		free_percpu(dev->tstats);
1131		return err;
1132	}
1133
1134	tunnel->dev = dev;
1135	tunnel->net = dev_net(dev);
1136	strcpy(tunnel->parms.name, dev->name);
1137	iph->version		= 4;
1138	iph->ihl		= 5;
1139
1140	if (tunnel->collect_md) {
1141		dev->features |= NETIF_F_NETNS_LOCAL;
1142		netif_keep_dst(dev);
1143	}
1144	return 0;
1145}
1146EXPORT_SYMBOL_GPL(ip_tunnel_init);
1147
1148void ip_tunnel_uninit(struct net_device *dev)
1149{
1150	struct ip_tunnel *tunnel = netdev_priv(dev);
1151	struct net *net = tunnel->net;
1152	struct ip_tunnel_net *itn;
1153
1154	itn = net_generic(net, tunnel->ip_tnl_net_id);
1155	/* fb_tunnel_dev will be unregisted in net-exit call. */
1156	if (itn->fb_tunnel_dev != dev)
1157		ip_tunnel_del(itn, netdev_priv(dev));
1158
1159	dst_cache_reset(&tunnel->dst_cache);
1160}
1161EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1162
1163/* Do least required initialization, rest of init is done in tunnel_init call */
1164void ip_tunnel_setup(struct net_device *dev, int net_id)
1165{
1166	struct ip_tunnel *tunnel = netdev_priv(dev);
1167	tunnel->ip_tnl_net_id = net_id;
1168}
1169EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1170
1171MODULE_LICENSE("GPL");