Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2013 Nicira, Inc.
   4 */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <linux/capability.h>
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/slab.h>
  13#include <linux/uaccess.h>
  14#include <linux/skbuff.h>
  15#include <linux/netdevice.h>
  16#include <linux/in.h>
  17#include <linux/tcp.h>
  18#include <linux/udp.h>
  19#include <linux/if_arp.h>
  20#include <linux/init.h>
  21#include <linux/in6.h>
  22#include <linux/inetdevice.h>
  23#include <linux/igmp.h>
  24#include <linux/netfilter_ipv4.h>
  25#include <linux/etherdevice.h>
  26#include <linux/if_ether.h>
  27#include <linux/if_vlan.h>
  28#include <linux/rculist.h>
  29#include <linux/err.h>
  30
  31#include <net/sock.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/protocol.h>
  35#include <net/ip_tunnels.h>
  36#include <net/arp.h>
  37#include <net/checksum.h>
  38#include <net/dsfield.h>
  39#include <net/inet_ecn.h>
  40#include <net/xfrm.h>
  41#include <net/net_namespace.h>
  42#include <net/netns/generic.h>
  43#include <net/rtnetlink.h>
  44#include <net/udp.h>
  45#include <net/dst_metadata.h>
  46
  47#if IS_ENABLED(CONFIG_IPV6)
  48#include <net/ipv6.h>
  49#include <net/ip6_fib.h>
  50#include <net/ip6_route.h>
  51#endif
  52
  53static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
  54{
  55	return hash_32((__force u32)key ^ (__force u32)remote,
  56			 IP_TNL_HASH_BITS);
  57}
  58
  59static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
  60				__be16 flags, __be32 key)
  61{
  62	if (p->i_flags & TUNNEL_KEY) {
  63		if (flags & TUNNEL_KEY)
  64			return key == p->i_key;
  65		else
  66			/* key expected, none present */
  67			return false;
  68	} else
  69		return !(flags & TUNNEL_KEY);
  70}
  71
  72/* Fallback tunnel: no source, no destination, no key, no options
  73
  74   Tunnel hash table:
  75   We require exact key match i.e. if a key is present in packet
  76   it will match only tunnel with the same key; if it is not present,
  77   it will match only keyless tunnel.
  78
  79   All keysless packets, if not matched configured keyless tunnels
  80   will match fallback tunnel.
  81   Given src, dst and key, find appropriate for input tunnel.
  82*/
  83struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  84				   int link, __be16 flags,
  85				   __be32 remote, __be32 local,
  86				   __be32 key)
  87{
  88	struct ip_tunnel *t, *cand = NULL;
  89	struct hlist_head *head;
  90	struct net_device *ndev;
  91	unsigned int hash;
  92
  93	hash = ip_tunnel_hash(key, remote);
  94	head = &itn->tunnels[hash];
  95
  96	hlist_for_each_entry_rcu(t, head, hash_node) {
  97		if (local != t->parms.iph.saddr ||
  98		    remote != t->parms.iph.daddr ||
  99		    !(t->dev->flags & IFF_UP))
 100			continue;
 101
 102		if (!ip_tunnel_key_match(&t->parms, flags, key))
 103			continue;
 104
 105		if (t->parms.link == link)
 106			return t;
 107		else
 108			cand = t;
 109	}
 110
 111	hlist_for_each_entry_rcu(t, head, hash_node) {
 112		if (remote != t->parms.iph.daddr ||
 113		    t->parms.iph.saddr != 0 ||
 114		    !(t->dev->flags & IFF_UP))
 115			continue;
 116
 117		if (!ip_tunnel_key_match(&t->parms, flags, key))
 118			continue;
 119
 120		if (t->parms.link == link)
 121			return t;
 122		else if (!cand)
 123			cand = t;
 124	}
 125
 126	hash = ip_tunnel_hash(key, 0);
 127	head = &itn->tunnels[hash];
 128
 129	hlist_for_each_entry_rcu(t, head, hash_node) {
 130		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
 131		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
 132			continue;
 133
 134		if (!(t->dev->flags & IFF_UP))
 135			continue;
 136
 137		if (!ip_tunnel_key_match(&t->parms, flags, key))
 138			continue;
 139
 140		if (t->parms.link == link)
 141			return t;
 142		else if (!cand)
 143			cand = t;
 144	}
 145
 146	hlist_for_each_entry_rcu(t, head, hash_node) {
 147		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
 148		    t->parms.iph.saddr != 0 ||
 149		    t->parms.iph.daddr != 0 ||
 150		    !(t->dev->flags & IFF_UP))
 151			continue;
 152
 153		if (t->parms.link == link)
 154			return t;
 155		else if (!cand)
 156			cand = t;
 157	}
 158
 159	if (cand)
 160		return cand;
 161
 162	t = rcu_dereference(itn->collect_md_tun);
 163	if (t && t->dev->flags & IFF_UP)
 164		return t;
 165
 166	ndev = READ_ONCE(itn->fb_tunnel_dev);
 167	if (ndev && ndev->flags & IFF_UP)
 168		return netdev_priv(ndev);
 169
 170	return NULL;
 171}
 172EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
 173
 174static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 175				    struct ip_tunnel_parm *parms)
 176{
 177	unsigned int h;
 178	__be32 remote;
 179	__be32 i_key = parms->i_key;
 180
 181	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
 182		remote = parms->iph.daddr;
 183	else
 184		remote = 0;
 185
 186	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
 187		i_key = 0;
 188
 189	h = ip_tunnel_hash(i_key, remote);
 190	return &itn->tunnels[h];
 191}
 192
 193static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 194{
 195	struct hlist_head *head = ip_bucket(itn, &t->parms);
 196
 197	if (t->collect_md)
 198		rcu_assign_pointer(itn->collect_md_tun, t);
 199	hlist_add_head_rcu(&t->hash_node, head);
 200}
 201
 202static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 203{
 204	if (t->collect_md)
 205		rcu_assign_pointer(itn->collect_md_tun, NULL);
 206	hlist_del_init_rcu(&t->hash_node);
 207}
 208
 209static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
 210					struct ip_tunnel_parm *parms,
 211					int type)
 212{
 213	__be32 remote = parms->iph.daddr;
 214	__be32 local = parms->iph.saddr;
 215	__be32 key = parms->i_key;
 216	__be16 flags = parms->i_flags;
 217	int link = parms->link;
 218	struct ip_tunnel *t = NULL;
 219	struct hlist_head *head = ip_bucket(itn, parms);
 220
 221	hlist_for_each_entry_rcu(t, head, hash_node) {
 222		if (local == t->parms.iph.saddr &&
 223		    remote == t->parms.iph.daddr &&
 224		    link == t->parms.link &&
 225		    type == t->dev->type &&
 226		    ip_tunnel_key_match(&t->parms, flags, key))
 227			break;
 228	}
 229	return t;
 230}
 231
 232static struct net_device *__ip_tunnel_create(struct net *net,
 233					     const struct rtnl_link_ops *ops,
 234					     struct ip_tunnel_parm *parms)
 235{
 236	int err;
 237	struct ip_tunnel *tunnel;
 238	struct net_device *dev;
 239	char name[IFNAMSIZ];
 240
 241	err = -E2BIG;
 242	if (parms->name[0]) {
 243		if (!dev_valid_name(parms->name))
 244			goto failed;
 245		strlcpy(name, parms->name, IFNAMSIZ);
 246	} else {
 247		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 248			goto failed;
 249		strcpy(name, ops->kind);
 250		strcat(name, "%d");
 251	}
 252
 253	ASSERT_RTNL();
 254	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
 255	if (!dev) {
 256		err = -ENOMEM;
 257		goto failed;
 258	}
 259	dev_net_set(dev, net);
 260
 261	dev->rtnl_link_ops = ops;
 262
 263	tunnel = netdev_priv(dev);
 264	tunnel->parms = *parms;
 265	tunnel->net = net;
 266
 267	err = register_netdevice(dev);
 268	if (err)
 269		goto failed_free;
 270
 271	return dev;
 272
 273failed_free:
 274	free_netdev(dev);
 275failed:
 276	return ERR_PTR(err);
 277}
 278
 279static int ip_tunnel_bind_dev(struct net_device *dev)
 280{
 281	struct net_device *tdev = NULL;
 282	struct ip_tunnel *tunnel = netdev_priv(dev);
 283	const struct iphdr *iph;
 284	int hlen = LL_MAX_HEADER;
 285	int mtu = ETH_DATA_LEN;
 286	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 287
 288	iph = &tunnel->parms.iph;
 289
 290	/* Guess output device to choose reasonable mtu and needed_headroom */
 291	if (iph->daddr) {
 292		struct flowi4 fl4;
 293		struct rtable *rt;
 294
 295		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 296				    iph->saddr, tunnel->parms.o_key,
 297				    RT_TOS(iph->tos), tunnel->parms.link,
 298				    tunnel->fwmark, 0);
 299		rt = ip_route_output_key(tunnel->net, &fl4);
 300
 301		if (!IS_ERR(rt)) {
 302			tdev = rt->dst.dev;
 303			ip_rt_put(rt);
 304		}
 305		if (dev->type != ARPHRD_ETHER)
 306			dev->flags |= IFF_POINTOPOINT;
 307
 308		dst_cache_reset(&tunnel->dst_cache);
 309	}
 310
 311	if (!tdev && tunnel->parms.link)
 312		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
 313
 314	if (tdev) {
 315		hlen = tdev->hard_header_len + tdev->needed_headroom;
 316		mtu = min(tdev->mtu, IP_MAX_MTU);
 317	}
 318
 319	dev->needed_headroom = t_hlen + hlen;
 320	mtu -= (dev->hard_header_len + t_hlen);
 321
 322	if (mtu < IPV4_MIN_MTU)
 323		mtu = IPV4_MIN_MTU;
 324
 325	return mtu;
 326}
 327
 328static struct ip_tunnel *ip_tunnel_create(struct net *net,
 329					  struct ip_tunnel_net *itn,
 330					  struct ip_tunnel_parm *parms)
 331{
 332	struct ip_tunnel *nt;
 333	struct net_device *dev;
 334	int t_hlen;
 335	int mtu;
 336	int err;
 337
 338	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 339	if (IS_ERR(dev))
 340		return ERR_CAST(dev);
 341
 342	mtu = ip_tunnel_bind_dev(dev);
 343	err = dev_set_mtu(dev, mtu);
 344	if (err)
 345		goto err_dev_set_mtu;
 346
 347	nt = netdev_priv(dev);
 348	t_hlen = nt->hlen + sizeof(struct iphdr);
 349	dev->min_mtu = ETH_MIN_MTU;
 350	dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 351	ip_tunnel_add(itn, nt);
 352	return nt;
 353
 354err_dev_set_mtu:
 355	unregister_netdevice(dev);
 356	return ERR_PTR(err);
 357}
 358
 359int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 360		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 361		  bool log_ecn_error)
 362{
 363	struct pcpu_sw_netstats *tstats;
 364	const struct iphdr *iph = ip_hdr(skb);
 365	int err;
 366
 367#ifdef CONFIG_NET_IPGRE_BROADCAST
 368	if (ipv4_is_multicast(iph->daddr)) {
 369		tunnel->dev->stats.multicast++;
 370		skb->pkt_type = PACKET_BROADCAST;
 371	}
 372#endif
 373
 374	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
 375	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
 376		tunnel->dev->stats.rx_crc_errors++;
 377		tunnel->dev->stats.rx_errors++;
 378		goto drop;
 379	}
 380
 381	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
 382		if (!(tpi->flags&TUNNEL_SEQ) ||
 383		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 384			tunnel->dev->stats.rx_fifo_errors++;
 385			tunnel->dev->stats.rx_errors++;
 386			goto drop;
 387		}
 388		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 389	}
 390
 391	skb_reset_network_header(skb);
 392
 393	err = IP_ECN_decapsulate(iph, skb);
 394	if (unlikely(err)) {
 395		if (log_ecn_error)
 396			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 397					&iph->saddr, iph->tos);
 398		if (err > 1) {
 399			++tunnel->dev->stats.rx_frame_errors;
 400			++tunnel->dev->stats.rx_errors;
 401			goto drop;
 402		}
 403	}
 404
 405	tstats = this_cpu_ptr(tunnel->dev->tstats);
 406	u64_stats_update_begin(&tstats->syncp);
 407	tstats->rx_packets++;
 408	tstats->rx_bytes += skb->len;
 409	u64_stats_update_end(&tstats->syncp);
 410
 411	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 412
 413	if (tunnel->dev->type == ARPHRD_ETHER) {
 414		skb->protocol = eth_type_trans(skb, tunnel->dev);
 415		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 416	} else {
 417		skb->dev = tunnel->dev;
 418	}
 419
 420	if (tun_dst)
 421		skb_dst_set(skb, (struct dst_entry *)tun_dst);
 422
 423	gro_cells_receive(&tunnel->gro_cells, skb);
 424	return 0;
 425
 426drop:
 427	if (tun_dst)
 428		dst_release((struct dst_entry *)tun_dst);
 429	kfree_skb(skb);
 430	return 0;
 431}
 432EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 433
 434int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 435			    unsigned int num)
 436{
 437	if (num >= MAX_IPTUN_ENCAP_OPS)
 438		return -ERANGE;
 439
 440	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 441			&iptun_encaps[num],
 442			NULL, ops) ? 0 : -1;
 443}
 444EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
 445
 446int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 447			    unsigned int num)
 448{
 449	int ret;
 450
 451	if (num >= MAX_IPTUN_ENCAP_OPS)
 452		return -ERANGE;
 453
 454	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 455		       &iptun_encaps[num],
 456		       ops, NULL) == ops) ? 0 : -1;
 457
 458	synchronize_net();
 459
 460	return ret;
 461}
 462EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
 463
 464int ip_tunnel_encap_setup(struct ip_tunnel *t,
 465			  struct ip_tunnel_encap *ipencap)
 466{
 467	int hlen;
 468
 469	memset(&t->encap, 0, sizeof(t->encap));
 470
 471	hlen = ip_encap_hlen(ipencap);
 472	if (hlen < 0)
 473		return hlen;
 474
 475	t->encap.type = ipencap->type;
 476	t->encap.sport = ipencap->sport;
 477	t->encap.dport = ipencap->dport;
 478	t->encap.flags = ipencap->flags;
 479
 480	t->encap_hlen = hlen;
 481	t->hlen = t->encap_hlen + t->tun_hlen;
 482
 483	return 0;
 484}
 485EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 486
 487static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 488			    struct rtable *rt, __be16 df,
 489			    const struct iphdr *inner_iph,
 490			    int tunnel_hlen, __be32 dst, bool md)
 491{
 492	struct ip_tunnel *tunnel = netdev_priv(dev);
 493	int pkt_size;
 494	int mtu;
 495
 496	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
 497	pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
 498
 499	if (df)
 500		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
 501					- sizeof(struct iphdr) - tunnel_hlen;
 502	else
 503		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 504
 505	if (skb_valid_dst(skb))
 506		skb_dst_update_pmtu_no_confirm(skb, mtu);
 507
 508	if (skb->protocol == htons(ETH_P_IP)) {
 509		if (!skb_is_gso(skb) &&
 510		    (inner_iph->frag_off & htons(IP_DF)) &&
 511		    mtu < pkt_size) {
 512			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 513			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 514			return -E2BIG;
 515		}
 516	}
 517#if IS_ENABLED(CONFIG_IPV6)
 518	else if (skb->protocol == htons(ETH_P_IPV6)) {
 519		struct rt6_info *rt6;
 520		__be32 daddr;
 521
 522		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
 523					   NULL;
 524		daddr = md ? dst : tunnel->parms.iph.daddr;
 525
 526		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 527			   mtu >= IPV6_MIN_MTU) {
 528			if ((daddr && !ipv4_is_multicast(daddr)) ||
 529			    rt6->rt6i_dst.plen == 128) {
 530				rt6->rt6i_flags |= RTF_MODIFIED;
 531				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 532			}
 533		}
 534
 535		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
 536					mtu < pkt_size) {
 537			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 538			return -E2BIG;
 539		}
 540	}
 541#endif
 542	return 0;
 543}
 544
 545void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 546		       u8 proto, int tunnel_hlen)
 547{
 548	struct ip_tunnel *tunnel = netdev_priv(dev);
 549	u32 headroom = sizeof(struct iphdr);
 550	struct ip_tunnel_info *tun_info;
 551	const struct ip_tunnel_key *key;
 552	const struct iphdr *inner_iph;
 553	struct rtable *rt = NULL;
 554	struct flowi4 fl4;
 555	__be16 df = 0;
 556	u8 tos, ttl;
 557	bool use_cache;
 558
 559	tun_info = skb_tunnel_info(skb);
 560	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 561		     ip_tunnel_info_af(tun_info) != AF_INET))
 562		goto tx_error;
 563	key = &tun_info->key;
 564	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 565	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 566	tos = key->tos;
 567	if (tos == 1) {
 568		if (skb->protocol == htons(ETH_P_IP))
 569			tos = inner_iph->tos;
 570		else if (skb->protocol == htons(ETH_P_IPV6))
 571			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 572	}
 573	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
 574			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
 575			    0, skb->mark, skb_get_hash(skb));
 576	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 577		goto tx_error;
 578
 579	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 580	if (use_cache)
 581		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
 582	if (!rt) {
 583		rt = ip_route_output_key(tunnel->net, &fl4);
 584		if (IS_ERR(rt)) {
 585			dev->stats.tx_carrier_errors++;
 586			goto tx_error;
 587		}
 588		if (use_cache)
 589			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 590					  fl4.saddr);
 591	}
 592	if (rt->dst.dev == dev) {
 593		ip_rt_put(rt);
 594		dev->stats.collisions++;
 595		goto tx_error;
 596	}
 597
 598	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
 599		df = htons(IP_DF);
 600	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
 601			    key->u.ipv4.dst, true)) {
 602		ip_rt_put(rt);
 603		goto tx_error;
 604	}
 605
 606	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 607	ttl = key->ttl;
 608	if (ttl == 0) {
 609		if (skb->protocol == htons(ETH_P_IP))
 610			ttl = inner_iph->ttl;
 611		else if (skb->protocol == htons(ETH_P_IPV6))
 612			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 613		else
 614			ttl = ip4_dst_hoplimit(&rt->dst);
 615	}
 616
 617	if (!df && skb->protocol == htons(ETH_P_IP))
 618		df = inner_iph->frag_off & htons(IP_DF);
 619
 620	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 621	if (headroom > dev->needed_headroom)
 622		dev->needed_headroom = headroom;
 623
 624	if (skb_cow_head(skb, dev->needed_headroom)) {
 625		ip_rt_put(rt);
 626		goto tx_dropped;
 627	}
 628	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
 629		      df, !net_eq(tunnel->net, dev_net(dev)));
 630	return;
 631tx_error:
 632	dev->stats.tx_errors++;
 633	goto kfree;
 634tx_dropped:
 635	dev->stats.tx_dropped++;
 636kfree:
 637	kfree_skb(skb);
 638}
 639EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
 640
 641void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 642		    const struct iphdr *tnl_params, u8 protocol)
 643{
 644	struct ip_tunnel *tunnel = netdev_priv(dev);
 645	struct ip_tunnel_info *tun_info = NULL;
 646	const struct iphdr *inner_iph;
 647	unsigned int max_headroom;	/* The extra header space needed */
 648	struct rtable *rt = NULL;		/* Route to the other host */
 649	bool use_cache = false;
 650	struct flowi4 fl4;
 651	bool md = false;
 652	bool connected;
 653	u8 tos, ttl;
 654	__be32 dst;
 655	__be16 df;
 656
 657	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 658	connected = (tunnel->parms.iph.daddr != 0);
 659
 660	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 661
 662	dst = tnl_params->daddr;
 663	if (dst == 0) {
 664		/* NBMA tunnel */
 665
 666		if (!skb_dst(skb)) {
 667			dev->stats.tx_fifo_errors++;
 668			goto tx_error;
 669		}
 670
 671		tun_info = skb_tunnel_info(skb);
 672		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
 673		    ip_tunnel_info_af(tun_info) == AF_INET &&
 674		    tun_info->key.u.ipv4.dst) {
 675			dst = tun_info->key.u.ipv4.dst;
 676			md = true;
 677			connected = true;
 678		}
 679		else if (skb->protocol == htons(ETH_P_IP)) {
 680			rt = skb_rtable(skb);
 681			dst = rt_nexthop(rt, inner_iph->daddr);
 682		}
 683#if IS_ENABLED(CONFIG_IPV6)
 684		else if (skb->protocol == htons(ETH_P_IPV6)) {
 685			const struct in6_addr *addr6;
 686			struct neighbour *neigh;
 687			bool do_tx_error_icmp;
 688			int addr_type;
 689
 690			neigh = dst_neigh_lookup(skb_dst(skb),
 691						 &ipv6_hdr(skb)->daddr);
 692			if (!neigh)
 693				goto tx_error;
 694
 695			addr6 = (const struct in6_addr *)&neigh->primary_key;
 696			addr_type = ipv6_addr_type(addr6);
 697
 698			if (addr_type == IPV6_ADDR_ANY) {
 699				addr6 = &ipv6_hdr(skb)->daddr;
 700				addr_type = ipv6_addr_type(addr6);
 701			}
 702
 703			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 704				do_tx_error_icmp = true;
 705			else {
 706				do_tx_error_icmp = false;
 707				dst = addr6->s6_addr32[3];
 708			}
 709			neigh_release(neigh);
 710			if (do_tx_error_icmp)
 711				goto tx_error_icmp;
 712		}
 713#endif
 714		else
 715			goto tx_error;
 716
 717		if (!md)
 718			connected = false;
 719	}
 720
 721	tos = tnl_params->tos;
 722	if (tos & 0x1) {
 723		tos &= ~0x1;
 724		if (skb->protocol == htons(ETH_P_IP)) {
 725			tos = inner_iph->tos;
 726			connected = false;
 727		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 728			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 729			connected = false;
 730		}
 731	}
 732
 733	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 734			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
 735			    tunnel->fwmark, skb_get_hash(skb));
 736
 737	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 738		goto tx_error;
 739
 740	if (connected && md) {
 741		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 742		if (use_cache)
 743			rt = dst_cache_get_ip4(&tun_info->dst_cache,
 744					       &fl4.saddr);
 745	} else {
 746		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
 747						&fl4.saddr) : NULL;
 748	}
 749
 750	if (!rt) {
 751		rt = ip_route_output_key(tunnel->net, &fl4);
 752
 753		if (IS_ERR(rt)) {
 754			dev->stats.tx_carrier_errors++;
 755			goto tx_error;
 756		}
 757		if (use_cache)
 758			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 759					  fl4.saddr);
 760		else if (!md && connected)
 761			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
 762					  fl4.saddr);
 763	}
 764
 765	if (rt->dst.dev == dev) {
 766		ip_rt_put(rt);
 767		dev->stats.collisions++;
 768		goto tx_error;
 769	}
 770
 771	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
 772			    0, 0, false)) {
 773		ip_rt_put(rt);
 774		goto tx_error;
 775	}
 776
 777	if (tunnel->err_count > 0) {
 778		if (time_before(jiffies,
 779				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 780			tunnel->err_count--;
 781
 782			dst_link_failure(skb);
 783		} else
 784			tunnel->err_count = 0;
 785	}
 786
 787	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 788	ttl = tnl_params->ttl;
 789	if (ttl == 0) {
 790		if (skb->protocol == htons(ETH_P_IP))
 791			ttl = inner_iph->ttl;
 792#if IS_ENABLED(CONFIG_IPV6)
 793		else if (skb->protocol == htons(ETH_P_IPV6))
 794			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
 795#endif
 796		else
 797			ttl = ip4_dst_hoplimit(&rt->dst);
 798	}
 799
 800	df = tnl_params->frag_off;
 801	if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
 802		df |= (inner_iph->frag_off&htons(IP_DF));
 803
 804	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 805			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
 806	if (max_headroom > dev->needed_headroom)
 807		dev->needed_headroom = max_headroom;
 808
 809	if (skb_cow_head(skb, dev->needed_headroom)) {
 810		ip_rt_put(rt);
 811		dev->stats.tx_dropped++;
 812		kfree_skb(skb);
 813		return;
 814	}
 815
 816	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
 817		      df, !net_eq(tunnel->net, dev_net(dev)));
 818	return;
 819
 820#if IS_ENABLED(CONFIG_IPV6)
 821tx_error_icmp:
 822	dst_link_failure(skb);
 823#endif
 824tx_error:
 825	dev->stats.tx_errors++;
 826	kfree_skb(skb);
 827}
 828EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
 829
 830static void ip_tunnel_update(struct ip_tunnel_net *itn,
 831			     struct ip_tunnel *t,
 832			     struct net_device *dev,
 833			     struct ip_tunnel_parm *p,
 834			     bool set_mtu,
 835			     __u32 fwmark)
 836{
 837	ip_tunnel_del(itn, t);
 838	t->parms.iph.saddr = p->iph.saddr;
 839	t->parms.iph.daddr = p->iph.daddr;
 840	t->parms.i_key = p->i_key;
 841	t->parms.o_key = p->o_key;
 842	if (dev->type != ARPHRD_ETHER) {
 843		memcpy(dev->dev_addr, &p->iph.saddr, 4);
 844		memcpy(dev->broadcast, &p->iph.daddr, 4);
 845	}
 846	ip_tunnel_add(itn, t);
 847
 848	t->parms.iph.ttl = p->iph.ttl;
 849	t->parms.iph.tos = p->iph.tos;
 850	t->parms.iph.frag_off = p->iph.frag_off;
 851
 852	if (t->parms.link != p->link || t->fwmark != fwmark) {
 853		int mtu;
 854
 855		t->parms.link = p->link;
 856		t->fwmark = fwmark;
 857		mtu = ip_tunnel_bind_dev(dev);
 858		if (set_mtu)
 859			dev->mtu = mtu;
 860	}
 861	dst_cache_reset(&t->dst_cache);
 862	netdev_state_change(dev);
 863}
 864
 865int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 866{
 867	int err = 0;
 868	struct ip_tunnel *t = netdev_priv(dev);
 869	struct net *net = t->net;
 870	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 871
 872	switch (cmd) {
 873	case SIOCGETTUNNEL:
 874		if (dev == itn->fb_tunnel_dev) {
 875			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 876			if (!t)
 877				t = netdev_priv(dev);
 878		}
 879		memcpy(p, &t->parms, sizeof(*p));
 880		break;
 881
 882	case SIOCADDTUNNEL:
 883	case SIOCCHGTUNNEL:
 884		err = -EPERM;
 885		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 886			goto done;
 887		if (p->iph.ttl)
 888			p->iph.frag_off |= htons(IP_DF);
 889		if (!(p->i_flags & VTI_ISVTI)) {
 890			if (!(p->i_flags & TUNNEL_KEY))
 891				p->i_key = 0;
 892			if (!(p->o_flags & TUNNEL_KEY))
 893				p->o_key = 0;
 894		}
 895
 896		t = ip_tunnel_find(itn, p, itn->type);
 897
 898		if (cmd == SIOCADDTUNNEL) {
 899			if (!t) {
 900				t = ip_tunnel_create(net, itn, p);
 901				err = PTR_ERR_OR_ZERO(t);
 902				break;
 903			}
 904
 905			err = -EEXIST;
 906			break;
 907		}
 908		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 909			if (t) {
 910				if (t->dev != dev) {
 911					err = -EEXIST;
 912					break;
 913				}
 914			} else {
 915				unsigned int nflags = 0;
 916
 917				if (ipv4_is_multicast(p->iph.daddr))
 918					nflags = IFF_BROADCAST;
 919				else if (p->iph.daddr)
 920					nflags = IFF_POINTOPOINT;
 921
 922				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 923					err = -EINVAL;
 924					break;
 925				}
 926
 927				t = netdev_priv(dev);
 928			}
 929		}
 930
 931		if (t) {
 932			err = 0;
 933			ip_tunnel_update(itn, t, dev, p, true, 0);
 934		} else {
 935			err = -ENOENT;
 936		}
 937		break;
 938
 939	case SIOCDELTUNNEL:
 940		err = -EPERM;
 941		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 942			goto done;
 943
 944		if (dev == itn->fb_tunnel_dev) {
 945			err = -ENOENT;
 946			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
 947			if (!t)
 948				goto done;
 949			err = -EPERM;
 950			if (t == netdev_priv(itn->fb_tunnel_dev))
 951				goto done;
 952			dev = t->dev;
 953		}
 954		unregister_netdevice(dev);
 955		err = 0;
 956		break;
 957
 958	default:
 959		err = -EINVAL;
 960	}
 961
 962done:
 963	return err;
 964}
 965EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
 966
 967int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 968{
 969	struct ip_tunnel_parm p;
 970	int err;
 971
 972	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 973		return -EFAULT;
 974	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
 975	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 976		return -EFAULT;
 977	return err;
 978}
 979EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 980
 981int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 982{
 983	struct ip_tunnel *tunnel = netdev_priv(dev);
 984	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 985	int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
 986
 987	if (new_mtu < ETH_MIN_MTU)
 988		return -EINVAL;
 989
 990	if (new_mtu > max_mtu) {
 991		if (strict)
 992			return -EINVAL;
 993
 994		new_mtu = max_mtu;
 995	}
 996
 997	dev->mtu = new_mtu;
 998	return 0;
 999}
1000EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1001
1002int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1003{
1004	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1005}
1006EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1007
1008static void ip_tunnel_dev_free(struct net_device *dev)
1009{
1010	struct ip_tunnel *tunnel = netdev_priv(dev);
1011
1012	gro_cells_destroy(&tunnel->gro_cells);
1013	dst_cache_destroy(&tunnel->dst_cache);
1014	free_percpu(dev->tstats);
1015}
1016
1017void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1018{
1019	struct ip_tunnel *tunnel = netdev_priv(dev);
1020	struct ip_tunnel_net *itn;
1021
1022	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1023
1024	if (itn->fb_tunnel_dev != dev) {
1025		ip_tunnel_del(itn, netdev_priv(dev));
1026		unregister_netdevice_queue(dev, head);
1027	}
1028}
1029EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1030
1031struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1032{
1033	struct ip_tunnel *tunnel = netdev_priv(dev);
1034
1035	return tunnel->net;
1036}
1037EXPORT_SYMBOL(ip_tunnel_get_link_net);
1038
1039int ip_tunnel_get_iflink(const struct net_device *dev)
1040{
1041	struct ip_tunnel *tunnel = netdev_priv(dev);
1042
1043	return tunnel->parms.link;
1044}
1045EXPORT_SYMBOL(ip_tunnel_get_iflink);
1046
1047int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1048				  struct rtnl_link_ops *ops, char *devname)
1049{
1050	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1051	struct ip_tunnel_parm parms;
1052	unsigned int i;
1053
1054	itn->rtnl_link_ops = ops;
1055	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1056		INIT_HLIST_HEAD(&itn->tunnels[i]);
1057
1058	if (!ops || !net_has_fallback_tunnels(net)) {
1059		struct ip_tunnel_net *it_init_net;
1060
1061		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1062		itn->type = it_init_net->type;
1063		itn->fb_tunnel_dev = NULL;
1064		return 0;
1065	}
1066
1067	memset(&parms, 0, sizeof(parms));
1068	if (devname)
1069		strlcpy(parms.name, devname, IFNAMSIZ);
1070
1071	rtnl_lock();
1072	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1073	/* FB netdevice is special: we have one, and only one per netns.
1074	 * Allowing to move it to another netns is clearly unsafe.
1075	 */
1076	if (!IS_ERR(itn->fb_tunnel_dev)) {
1077		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1078		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1079		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1080		itn->type = itn->fb_tunnel_dev->type;
1081	}
1082	rtnl_unlock();
1083
1084	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1085}
1086EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1087
1088static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1089			      struct list_head *head,
1090			      struct rtnl_link_ops *ops)
1091{
1092	struct net_device *dev, *aux;
1093	int h;
1094
1095	for_each_netdev_safe(net, dev, aux)
1096		if (dev->rtnl_link_ops == ops)
1097			unregister_netdevice_queue(dev, head);
1098
1099	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1100		struct ip_tunnel *t;
1101		struct hlist_node *n;
1102		struct hlist_head *thead = &itn->tunnels[h];
1103
1104		hlist_for_each_entry_safe(t, n, thead, hash_node)
1105			/* If dev is in the same netns, it has already
1106			 * been added to the list by the previous loop.
1107			 */
1108			if (!net_eq(dev_net(t->dev), net))
1109				unregister_netdevice_queue(t->dev, head);
1110	}
1111}
1112
1113void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1114			   struct rtnl_link_ops *ops)
1115{
1116	struct ip_tunnel_net *itn;
1117	struct net *net;
1118	LIST_HEAD(list);
1119
1120	rtnl_lock();
1121	list_for_each_entry(net, net_list, exit_list) {
1122		itn = net_generic(net, id);
1123		ip_tunnel_destroy(net, itn, &list, ops);
1124	}
1125	unregister_netdevice_many(&list);
1126	rtnl_unlock();
1127}
1128EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1129
1130int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1131		      struct ip_tunnel_parm *p, __u32 fwmark)
1132{
1133	struct ip_tunnel *nt;
1134	struct net *net = dev_net(dev);
1135	struct ip_tunnel_net *itn;
1136	int mtu;
1137	int err;
1138
1139	nt = netdev_priv(dev);
1140	itn = net_generic(net, nt->ip_tnl_net_id);
1141
1142	if (nt->collect_md) {
1143		if (rtnl_dereference(itn->collect_md_tun))
1144			return -EEXIST;
1145	} else {
1146		if (ip_tunnel_find(itn, p, dev->type))
1147			return -EEXIST;
1148	}
1149
1150	nt->net = net;
1151	nt->parms = *p;
1152	nt->fwmark = fwmark;
1153	err = register_netdevice(dev);
1154	if (err)
1155		goto err_register_netdevice;
1156
1157	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1158		eth_hw_addr_random(dev);
1159
1160	mtu = ip_tunnel_bind_dev(dev);
1161	if (tb[IFLA_MTU]) {
1162		unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1163
1164		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1165			    (unsigned int)(max - sizeof(struct iphdr)));
1166	}
1167
1168	err = dev_set_mtu(dev, mtu);
1169	if (err)
1170		goto err_dev_set_mtu;
1171
1172	ip_tunnel_add(itn, nt);
1173	return 0;
1174
1175err_dev_set_mtu:
1176	unregister_netdevice(dev);
1177err_register_netdevice:
1178	return err;
1179}
1180EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1181
1182int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1183			 struct ip_tunnel_parm *p, __u32 fwmark)
1184{
1185	struct ip_tunnel *t;
1186	struct ip_tunnel *tunnel = netdev_priv(dev);
1187	struct net *net = tunnel->net;
1188	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1189
1190	if (dev == itn->fb_tunnel_dev)
1191		return -EINVAL;
1192
1193	t = ip_tunnel_find(itn, p, dev->type);
1194
1195	if (t) {
1196		if (t->dev != dev)
1197			return -EEXIST;
1198	} else {
1199		t = tunnel;
1200
1201		if (dev->type != ARPHRD_ETHER) {
1202			unsigned int nflags = 0;
1203
1204			if (ipv4_is_multicast(p->iph.daddr))
1205				nflags = IFF_BROADCAST;
1206			else if (p->iph.daddr)
1207				nflags = IFF_POINTOPOINT;
1208
1209			if ((dev->flags ^ nflags) &
1210			    (IFF_POINTOPOINT | IFF_BROADCAST))
1211				return -EINVAL;
1212		}
1213	}
1214
1215	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1216	return 0;
1217}
1218EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1219
1220int ip_tunnel_init(struct net_device *dev)
1221{
1222	struct ip_tunnel *tunnel = netdev_priv(dev);
1223	struct iphdr *iph = &tunnel->parms.iph;
1224	int err;
1225
1226	dev->needs_free_netdev = true;
1227	dev->priv_destructor = ip_tunnel_dev_free;
1228	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1229	if (!dev->tstats)
1230		return -ENOMEM;
1231
1232	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1233	if (err) {
1234		free_percpu(dev->tstats);
1235		return err;
1236	}
1237
1238	err = gro_cells_init(&tunnel->gro_cells, dev);
1239	if (err) {
1240		dst_cache_destroy(&tunnel->dst_cache);
1241		free_percpu(dev->tstats);
1242		return err;
1243	}
1244
1245	tunnel->dev = dev;
1246	tunnel->net = dev_net(dev);
1247	strcpy(tunnel->parms.name, dev->name);
1248	iph->version		= 4;
1249	iph->ihl		= 5;
1250
1251	if (tunnel->collect_md)
1252		netif_keep_dst(dev);
1253	return 0;
1254}
1255EXPORT_SYMBOL_GPL(ip_tunnel_init);
1256
1257void ip_tunnel_uninit(struct net_device *dev)
1258{
1259	struct ip_tunnel *tunnel = netdev_priv(dev);
1260	struct net *net = tunnel->net;
1261	struct ip_tunnel_net *itn;
1262
1263	itn = net_generic(net, tunnel->ip_tnl_net_id);
1264	ip_tunnel_del(itn, netdev_priv(dev));
1265	if (itn->fb_tunnel_dev == dev)
1266		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1267
1268	dst_cache_reset(&tunnel->dst_cache);
1269}
1270EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1271
1272/* Do least required initialization, rest of init is done in tunnel_init call */
1273void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1274{
1275	struct ip_tunnel *tunnel = netdev_priv(dev);
1276	tunnel->ip_tnl_net_id = net_id;
1277}
1278EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1279
1280MODULE_LICENSE("GPL");