Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux NET3:	GRE over IP protocol decoder.
   4 *
   5 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   6 */
   7
   8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9
  10#include <linux/capability.h>
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/uaccess.h>
  16#include <linux/skbuff.h>
  17#include <linux/netdevice.h>
  18#include <linux/in.h>
  19#include <linux/tcp.h>
  20#include <linux/udp.h>
  21#include <linux/if_arp.h>
  22#include <linux/if_vlan.h>
  23#include <linux/init.h>
  24#include <linux/in6.h>
  25#include <linux/inetdevice.h>
  26#include <linux/igmp.h>
  27#include <linux/netfilter_ipv4.h>
  28#include <linux/etherdevice.h>
  29#include <linux/if_ether.h>
  30
  31#include <net/sock.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/protocol.h>
  35#include <net/ip_tunnels.h>
  36#include <net/arp.h>
  37#include <net/checksum.h>
  38#include <net/dsfield.h>
  39#include <net/inet_ecn.h>
  40#include <net/xfrm.h>
  41#include <net/net_namespace.h>
  42#include <net/netns/generic.h>
  43#include <net/rtnetlink.h>
  44#include <net/gre.h>
  45#include <net/dst_metadata.h>
  46#include <net/erspan.h>
  47
  48/*
  49   Problems & solutions
  50   --------------------
  51
  52   1. The most important issue is detecting local dead loops.
  53   They would cause complete host lockup in transmit, which
  54   would be "resolved" by stack overflow or, if queueing is enabled,
  55   with infinite looping in net_bh.
  56
  57   We cannot track such dead loops during route installation,
  58   it is infeasible task. The most general solutions would be
  59   to keep skb->encapsulation counter (sort of local ttl),
  60   and silently drop packet when it expires. It is a good
  61   solution, but it supposes maintaining new variable in ALL
  62   skb, even if no tunneling is used.
  63
  64   Current solution: xmit_recursion breaks dead loops. This is a percpu
  65   counter, since when we enter the first ndo_xmit(), cpu migration is
  66   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  67
  68   2. Networking dead loops would not kill routers, but would really
  69   kill network. IP hop limit plays role of "t->recursion" in this case,
  70   if we copy it from packet being encapsulated to upper header.
  71   It is very good solution, but it introduces two problems:
  72
  73   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  74     do not work over tunnels.
  75   - traceroute does not work. I planned to relay ICMP from tunnel,
  76     so that this problem would be solved and traceroute output
  77     would even more informative. This idea appeared to be wrong:
  78     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  79     true router now :-)), all routers (at least, in neighbourhood of mine)
  80     return only 8 bytes of payload. It is the end.
  81
  82   Hence, if we want that OSPF worked or traceroute said something reasonable,
  83   we should search for another solution.
  84
  85   One of them is to parse packet trying to detect inner encapsulation
  86   made by our node. It is difficult or even impossible, especially,
  87   taking into account fragmentation. TO be short, ttl is not solution at all.
  88
  89   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  90   We force DF flag on tunnels with preconfigured hop limit,
  91   that is ALL. :-) Well, it does not remove the problem completely,
  92   but exponential growth of network traffic is changed to linear
  93   (branches, that exceed pmtu are pruned) and tunnel mtu
  94   rapidly degrades to value <68, where looping stops.
  95   Yes, it is not good if there exists a router in the loop,
  96   which does not force DF, even when encapsulating packets have DF set.
  97   But it is not our problem! Nobody could accuse us, we made
  98   all that we could make. Even if it is your gated who injected
  99   fatal route to network, even if it were you who configured
 100   fatal static route: you are innocent. :-)
 101
 102   Alexey Kuznetsov.
 103 */
 104
 105static bool log_ecn_error = true;
 106module_param(log_ecn_error, bool, 0644);
 107MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 108
 109static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 
 
 110static int ipgre_tunnel_init(struct net_device *dev);
 111static void erspan_build_header(struct sk_buff *skb,
 112				u32 id, u32 index,
 113				bool truncate, bool is_ipv4);
 114
 115static unsigned int ipgre_net_id __read_mostly;
 116static unsigned int gre_tap_net_id __read_mostly;
 117static unsigned int erspan_net_id __read_mostly;
 118
 119static int ipgre_err(struct sk_buff *skb, u32 info,
 120		     const struct tnl_ptk_info *tpi)
 121{
 122
 123	/* All the routers (except for Linux) return only
 124	   8 bytes of packet payload. It means, that precise relaying of
 125	   ICMP in the real Internet is absolutely infeasible.
 126
 127	   Moreover, Cisco "wise men" put GRE key to the third word
 128	   in GRE header. It makes impossible maintaining even soft
 129	   state for keyed GRE tunnels with enabled checksum. Tell
 130	   them "thank you".
 131
 132	   Well, I wonder, rfc1812 was written by Cisco employee,
 133	   what the hell these idiots break standards established
 134	   by themselves???
 135	   */
 136	struct net *net = dev_net(skb->dev);
 137	struct ip_tunnel_net *itn;
 138	const struct iphdr *iph;
 139	const int type = icmp_hdr(skb)->type;
 140	const int code = icmp_hdr(skb)->code;
 141	unsigned int data_len = 0;
 142	struct ip_tunnel *t;
 143
 144	if (tpi->proto == htons(ETH_P_TEB))
 145		itn = net_generic(net, gre_tap_net_id);
 146	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
 147		 tpi->proto == htons(ETH_P_ERSPAN2))
 148		itn = net_generic(net, erspan_net_id);
 149	else
 150		itn = net_generic(net, ipgre_net_id);
 151
 152	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
 153	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 154			     iph->daddr, iph->saddr, tpi->key);
 155
 156	if (!t)
 157		return -ENOENT;
 158
 159	switch (type) {
 160	default:
 161	case ICMP_PARAMETERPROB:
 162		return 0;
 163
 164	case ICMP_DEST_UNREACH:
 165		switch (code) {
 166		case ICMP_SR_FAILED:
 167		case ICMP_PORT_UNREACH:
 168			/* Impossible event. */
 169			return 0;
 170		default:
 171			/* All others are translated to HOST_UNREACH.
 172			   rfc2003 contains "deep thoughts" about NET_UNREACH,
 173			   I believe they are just ether pollution. --ANK
 174			 */
 175			break;
 176		}
 177		break;
 178
 179	case ICMP_TIME_EXCEEDED:
 180		if (code != ICMP_EXC_TTL)
 181			return 0;
 182		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 183		break;
 184
 185	case ICMP_REDIRECT:
 186		break;
 187	}
 188
 189#if IS_ENABLED(CONFIG_IPV6)
 190       if (tpi->proto == htons(ETH_P_IPV6) &&
 191           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
 192				       type, data_len))
 193               return 0;
 194#endif
 195
 196	if (t->parms.iph.daddr == 0 ||
 197	    ipv4_is_multicast(t->parms.iph.daddr))
 198		return 0;
 199
 200	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 201		return 0;
 202
 203	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 204		t->err_count++;
 205	else
 206		t->err_count = 1;
 207	t->err_time = jiffies;
 208
 209	return 0;
 210}
 211
 212static void gre_err(struct sk_buff *skb, u32 info)
 213{
 214	/* All the routers (except for Linux) return only
 215	 * 8 bytes of packet payload. It means, that precise relaying of
 216	 * ICMP in the real Internet is absolutely infeasible.
 217	 *
 218	 * Moreover, Cisco "wise men" put GRE key to the third word
 219	 * in GRE header. It makes impossible maintaining even soft
 220	 * state for keyed
 221	 * GRE tunnels with enabled checksum. Tell them "thank you".
 222	 *
 223	 * Well, I wonder, rfc1812 was written by Cisco employee,
 224	 * what the hell these idiots break standards established
 225	 * by themselves???
 226	 */
 227
 228	const struct iphdr *iph = (struct iphdr *)skb->data;
 229	const int type = icmp_hdr(skb)->type;
 230	const int code = icmp_hdr(skb)->code;
 231	struct tnl_ptk_info tpi;
 232
 233	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
 234			     iph->ihl * 4) < 0)
 235		return;
 236
 237	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 238		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 239				 skb->dev->ifindex, IPPROTO_GRE);
 240		return;
 241	}
 242	if (type == ICMP_REDIRECT) {
 243		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
 244			      IPPROTO_GRE);
 245		return;
 246	}
 247
 248	ipgre_err(skb, info, &tpi);
 249}
 250
 
 
 
 
 
 
 
 
 
 251static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 252		      int gre_hdr_len)
 253{
 254	struct net *net = dev_net(skb->dev);
 255	struct metadata_dst *tun_dst = NULL;
 256	struct erspan_base_hdr *ershdr;
 257	struct ip_tunnel_net *itn;
 258	struct ip_tunnel *tunnel;
 259	const struct iphdr *iph;
 260	struct erspan_md2 *md2;
 261	int ver;
 262	int len;
 263
 264	itn = net_generic(net, erspan_net_id);
 265
 266	iph = ip_hdr(skb);
 267	ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
 268	ver = ershdr->ver;
 
 
 
 
 
 
 
 269
 270	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
 271				  tpi->flags | TUNNEL_KEY,
 272				  iph->saddr, iph->daddr, tpi->key);
 
 
 
 
 273
 274	if (tunnel) {
 275		len = gre_hdr_len + erspan_hdr_len(ver);
 
 
 
 
 276		if (unlikely(!pskb_may_pull(skb, len)))
 277			return PACKET_REJECT;
 278
 279		if (__iptunnel_pull_header(skb,
 280					   len,
 281					   htons(ETH_P_TEB),
 282					   false, false) < 0)
 283			goto drop;
 284
 285		if (tunnel->collect_md) {
 286			struct erspan_metadata *pkt_md, *md;
 287			struct ip_tunnel_info *info;
 288			unsigned char *gh;
 289			__be64 tun_id;
 290			__be16 flags;
 291
 292			tpi->flags |= TUNNEL_KEY;
 293			flags = tpi->flags;
 294			tun_id = key32_to_tunnel_id(tpi->key);
 295
 296			tun_dst = ip_tun_rx_dst(skb, flags,
 297						tun_id, sizeof(*md));
 298			if (!tun_dst)
 299				return PACKET_REJECT;
 300
 301			/* skb can be uncloned in __iptunnel_pull_header, so
 302			 * old pkt_md is no longer valid and we need to reset
 303			 * it
 304			 */
 305			gh = skb_network_header(skb) +
 306			     skb_network_header_len(skb);
 307			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
 308							    sizeof(*ershdr));
 309			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 310			md->version = ver;
 311			md2 = &md->u.md2;
 312			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
 313						       ERSPAN_V2_MDSIZE);
 314
 315			info = &tun_dst->u.tun_info;
 316			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
 317			info->options_len = sizeof(*md);
 318		}
 319
 320		skb_reset_mac_header(skb);
 321		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 322		return PACKET_RCVD;
 323	}
 324	return PACKET_REJECT;
 325
 326drop:
 327	kfree_skb(skb);
 328	return PACKET_RCVD;
 329}
 330
 331static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 332		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 333{
 334	struct metadata_dst *tun_dst = NULL;
 335	const struct iphdr *iph;
 336	struct ip_tunnel *tunnel;
 337
 338	iph = ip_hdr(skb);
 339	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 340				  iph->saddr, iph->daddr, tpi->key);
 341
 342	if (tunnel) {
 
 
 343		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
 344					   raw_proto, false) < 0)
 345			goto drop;
 346
 347		if (tunnel->dev->type != ARPHRD_NONE)
 
 
 
 348			skb_pop_mac_header(skb);
 349		else
 350			skb_reset_mac_header(skb);
 351		if (tunnel->collect_md) {
 
 
 352			__be16 flags;
 353			__be64 tun_id;
 354
 355			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
 356			tun_id = key32_to_tunnel_id(tpi->key);
 357			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 358			if (!tun_dst)
 359				return PACKET_REJECT;
 360		}
 361
 362		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 363		return PACKET_RCVD;
 364	}
 365	return PACKET_NEXT;
 366
 367drop:
 368	kfree_skb(skb);
 369	return PACKET_RCVD;
 370}
 371
 372static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 373		     int hdr_len)
 374{
 375	struct net *net = dev_net(skb->dev);
 376	struct ip_tunnel_net *itn;
 377	int res;
 378
 379	if (tpi->proto == htons(ETH_P_TEB))
 380		itn = net_generic(net, gre_tap_net_id);
 381	else
 382		itn = net_generic(net, ipgre_net_id);
 383
 384	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
 385	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
 386		/* ipgre tunnels in collect metadata mode should receive
 387		 * also ETH_P_TEB traffic.
 388		 */
 389		itn = net_generic(net, ipgre_net_id);
 390		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
 391	}
 392	return res;
 393}
 394
 395static int gre_rcv(struct sk_buff *skb)
 396{
 397	struct tnl_ptk_info tpi;
 398	bool csum_err = false;
 399	int hdr_len;
 400
 401#ifdef CONFIG_NET_IPGRE_BROADCAST
 402	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
 403		/* Looped back packet, drop it! */
 404		if (rt_is_output_route(skb_rtable(skb)))
 405			goto drop;
 406	}
 407#endif
 408
 409	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 410	if (hdr_len < 0)
 411		goto drop;
 412
 413	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 414		     tpi.proto == htons(ETH_P_ERSPAN2))) {
 415		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 416			return 0;
 417		goto out;
 418	}
 419
 420	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 421		return 0;
 422
 423out:
 424	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 425drop:
 426	kfree_skb(skb);
 427	return 0;
 428}
 429
 430static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 431		       const struct iphdr *tnl_params,
 432		       __be16 proto)
 433{
 434	struct ip_tunnel *tunnel = netdev_priv(dev);
 435
 436	if (tunnel->parms.o_flags & TUNNEL_SEQ)
 437		tunnel->o_seqno++;
 438
 439	/* Push GRE header. */
 440	gre_build_header(skb, tunnel->tun_hlen,
 441			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
 442			 htonl(tunnel->o_seqno));
 443
 444	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 445}
 446
 447static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 448{
 449	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 450}
 451
 452static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 453			__be16 proto)
 454{
 455	struct ip_tunnel *tunnel = netdev_priv(dev);
 456	struct ip_tunnel_info *tun_info;
 457	const struct ip_tunnel_key *key;
 458	int tunnel_hlen;
 459	__be16 flags;
 460
 461	tun_info = skb_tunnel_info(skb);
 462	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 463		     ip_tunnel_info_af(tun_info) != AF_INET))
 464		goto err_free_skb;
 465
 466	key = &tun_info->key;
 467	tunnel_hlen = gre_calc_hlen(key->tun_flags);
 468
 469	if (skb_cow_head(skb, dev->needed_headroom))
 470		goto err_free_skb;
 471
 472	/* Push Tunnel header. */
 473	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 474		goto err_free_skb;
 475
 476	flags = tun_info->key.tun_flags &
 477		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 478	gre_build_header(skb, tunnel_hlen, flags, proto,
 479			 tunnel_id_to_key32(tun_info->key.tun_id),
 480			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 481
 482	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 483
 484	return;
 485
 486err_free_skb:
 487	kfree_skb(skb);
 488	dev->stats.tx_dropped++;
 489}
 490
 491static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 492{
 493	struct ip_tunnel *tunnel = netdev_priv(dev);
 494	struct ip_tunnel_info *tun_info;
 495	const struct ip_tunnel_key *key;
 496	struct erspan_metadata *md;
 497	bool truncate = false;
 498	__be16 proto;
 499	int tunnel_hlen;
 500	int version;
 501	int nhoff;
 502	int thoff;
 503
 504	tun_info = skb_tunnel_info(skb);
 505	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 506		     ip_tunnel_info_af(tun_info) != AF_INET))
 507		goto err_free_skb;
 508
 509	key = &tun_info->key;
 510	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
 511		goto err_free_skb;
 512	if (tun_info->options_len < sizeof(*md))
 513		goto err_free_skb;
 514	md = ip_tunnel_info_opts(tun_info);
 515
 516	/* ERSPAN has fixed 8 byte GRE header */
 517	version = md->version;
 518	tunnel_hlen = 8 + erspan_hdr_len(version);
 519
 520	if (skb_cow_head(skb, dev->needed_headroom))
 521		goto err_free_skb;
 522
 523	if (gre_handle_offloads(skb, false))
 524		goto err_free_skb;
 525
 526	if (skb->len > dev->mtu + dev->hard_header_len) {
 527		pskb_trim(skb, dev->mtu + dev->hard_header_len);
 
 528		truncate = true;
 529	}
 530
 531	nhoff = skb_network_header(skb) - skb_mac_header(skb);
 532	if (skb->protocol == htons(ETH_P_IP) &&
 533	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
 534		truncate = true;
 535
 536	thoff = skb_transport_header(skb) - skb_mac_header(skb);
 537	if (skb->protocol == htons(ETH_P_IPV6) &&
 538	    (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
 539		truncate = true;
 
 
 
 
 
 
 540
 541	if (version == 1) {
 542		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
 543				    ntohl(md->u.index), truncate, true);
 544		proto = htons(ETH_P_ERSPAN);
 545	} else if (version == 2) {
 546		erspan_build_header_v2(skb,
 547				       ntohl(tunnel_id_to_key32(key->tun_id)),
 548				       md->u.md2.dir,
 549				       get_hwid(&md->u.md2),
 550				       truncate, true);
 551		proto = htons(ETH_P_ERSPAN2);
 552	} else {
 553		goto err_free_skb;
 554	}
 555
 556	gre_build_header(skb, 8, TUNNEL_SEQ,
 557			 proto, 0, htonl(tunnel->o_seqno++));
 558
 559	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 560
 561	return;
 562
 563err_free_skb:
 564	kfree_skb(skb);
 565	dev->stats.tx_dropped++;
 566}
 567
 568static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 569{
 570	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 571	const struct ip_tunnel_key *key;
 572	struct rtable *rt;
 573	struct flowi4 fl4;
 574
 575	if (ip_tunnel_info_af(info) != AF_INET)
 576		return -EINVAL;
 577
 578	key = &info->key;
 579	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
 580			    tunnel_id_to_key32(key->tun_id), key->tos, 0,
 581			    skb->mark, skb_get_hash(skb));
 
 582	rt = ip_route_output_key(dev_net(dev), &fl4);
 583	if (IS_ERR(rt))
 584		return PTR_ERR(rt);
 585
 586	ip_rt_put(rt);
 587	info->key.u.ipv4.src = fl4.saddr;
 588	return 0;
 589}
 590
 591static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 592			      struct net_device *dev)
 593{
 594	struct ip_tunnel *tunnel = netdev_priv(dev);
 595	const struct iphdr *tnl_params;
 596
 597	if (!pskb_inet_may_pull(skb))
 598		goto free_skb;
 599
 600	if (tunnel->collect_md) {
 601		gre_fb_xmit(skb, dev, skb->protocol);
 602		return NETDEV_TX_OK;
 603	}
 604
 605	if (dev->header_ops) {
 606		/* Need space for new headers */
 607		if (skb_cow_head(skb, dev->needed_headroom -
 608				      (tunnel->hlen + sizeof(struct iphdr))))
 609			goto free_skb;
 610
 611		tnl_params = (const struct iphdr *)skb->data;
 612
 613		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 614		 * to gre header.
 615		 */
 616		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 
 617		skb_reset_mac_header(skb);
 
 
 
 
 618	} else {
 619		if (skb_cow_head(skb, dev->needed_headroom))
 620			goto free_skb;
 621
 622		tnl_params = &tunnel->parms.iph;
 623	}
 624
 625	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 626		goto free_skb;
 627
 628	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 629	return NETDEV_TX_OK;
 630
 631free_skb:
 632	kfree_skb(skb);
 633	dev->stats.tx_dropped++;
 634	return NETDEV_TX_OK;
 635}
 636
 637static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 638			       struct net_device *dev)
 639{
 640	struct ip_tunnel *tunnel = netdev_priv(dev);
 641	bool truncate = false;
 642	__be16 proto;
 643
 644	if (!pskb_inet_may_pull(skb))
 645		goto free_skb;
 646
 647	if (tunnel->collect_md) {
 648		erspan_fb_xmit(skb, dev);
 649		return NETDEV_TX_OK;
 650	}
 651
 652	if (gre_handle_offloads(skb, false))
 653		goto free_skb;
 654
 655	if (skb_cow_head(skb, dev->needed_headroom))
 656		goto free_skb;
 657
 658	if (skb->len > dev->mtu + dev->hard_header_len) {
 659		pskb_trim(skb, dev->mtu + dev->hard_header_len);
 
 660		truncate = true;
 661	}
 662
 663	/* Push ERSPAN header */
 664	if (tunnel->erspan_ver == 1) {
 
 
 
 665		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
 666				    tunnel->index,
 667				    truncate, true);
 668		proto = htons(ETH_P_ERSPAN);
 669	} else if (tunnel->erspan_ver == 2) {
 670		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
 671				       tunnel->dir, tunnel->hwid,
 672				       truncate, true);
 673		proto = htons(ETH_P_ERSPAN2);
 674	} else {
 675		goto free_skb;
 676	}
 677
 678	tunnel->parms.o_flags &= ~TUNNEL_KEY;
 679	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
 680	return NETDEV_TX_OK;
 681
 682free_skb:
 683	kfree_skb(skb);
 684	dev->stats.tx_dropped++;
 685	return NETDEV_TX_OK;
 686}
 687
 688static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 689				struct net_device *dev)
 690{
 691	struct ip_tunnel *tunnel = netdev_priv(dev);
 692
 693	if (!pskb_inet_may_pull(skb))
 694		goto free_skb;
 695
 696	if (tunnel->collect_md) {
 697		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 698		return NETDEV_TX_OK;
 699	}
 700
 701	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 702		goto free_skb;
 703
 704	if (skb_cow_head(skb, dev->needed_headroom))
 705		goto free_skb;
 706
 707	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 708	return NETDEV_TX_OK;
 709
 710free_skb:
 711	kfree_skb(skb);
 712	dev->stats.tx_dropped++;
 713	return NETDEV_TX_OK;
 714}
 715
 716static void ipgre_link_update(struct net_device *dev, bool set_mtu)
 717{
 718	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 719	int len;
 720
 721	len = tunnel->tun_hlen;
 722	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 723	len = tunnel->tun_hlen - len;
 724	tunnel->hlen = tunnel->hlen + len;
 725
 726	dev->needed_headroom = dev->needed_headroom + len;
 
 
 
 
 727	if (set_mtu)
 728		dev->mtu = max_t(int, dev->mtu - len, 68);
 729
 730	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 731		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
 732		    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
 733			dev->features |= NETIF_F_GSO_SOFTWARE;
 734			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 735		} else {
 736			dev->features &= ~NETIF_F_GSO_SOFTWARE;
 737			dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 738		}
 739		dev->features |= NETIF_F_LLTX;
 740	} else {
 741		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 742		dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
 
 
 743	}
 744}
 745
 746static int ipgre_tunnel_ioctl(struct net_device *dev,
 747			      struct ifreq *ifr, int cmd)
 748{
 749	struct ip_tunnel_parm p;
 750	int err;
 751
 752	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 753		return -EFAULT;
 754
 755	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 756		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 757		    p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
 758		    ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
 759			return -EINVAL;
 760	}
 761
 762	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 763	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 764
 765	err = ip_tunnel_ioctl(dev, &p, cmd);
 766	if (err)
 767		return err;
 768
 769	if (cmd == SIOCCHGTUNNEL) {
 770		struct ip_tunnel *t = netdev_priv(dev);
 771
 772		t->parms.i_flags = p.i_flags;
 773		t->parms.o_flags = p.o_flags;
 774
 775		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
 776			ipgre_link_update(dev, true);
 777	}
 778
 779	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
 780	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
 781
 782	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 783		return -EFAULT;
 784
 785	return 0;
 786}
 787
 788/* Nice toy. Unfortunately, useless in real life :-)
 789   It allows to construct virtual multiprotocol broadcast "LAN"
 790   over the Internet, provided multicast routing is tuned.
 791
 792
 793   I have no idea was this bicycle invented before me,
 794   so that I had to set ARPHRD_IPGRE to a random value.
 795   I have an impression, that Cisco could make something similar,
 796   but this feature is apparently missing in IOS<=11.2(8).
 797
 798   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 799   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 800
 801   ping -t 255 224.66.66.66
 802
 803   If nobody answers, mbone does not work.
 804
 805   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 806   ip addr add 10.66.66.<somewhat>/24 dev Universe
 807   ifconfig Universe up
 808   ifconfig Universe add fe80::<Your_real_addr>/10
 809   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 810   ftp 10.66.66.66
 811   ...
 812   ftp fec0:6666:6666::193.233.7.65
 813   ...
 814 */
 815static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 816			unsigned short type,
 817			const void *daddr, const void *saddr, unsigned int len)
 818{
 819	struct ip_tunnel *t = netdev_priv(dev);
 820	struct iphdr *iph;
 821	struct gre_base_hdr *greh;
 822
 823	iph = skb_push(skb, t->hlen + sizeof(*iph));
 824	greh = (struct gre_base_hdr *)(iph+1);
 825	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
 826	greh->protocol = htons(type);
 827
 828	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 829
 830	/* Set the source hardware address. */
 831	if (saddr)
 832		memcpy(&iph->saddr, saddr, 4);
 833	if (daddr)
 834		memcpy(&iph->daddr, daddr, 4);
 835	if (iph->daddr)
 836		return t->hlen + sizeof(*iph);
 837
 838	return -(t->hlen + sizeof(*iph));
 839}
 840
 841static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 842{
 843	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 844	memcpy(haddr, &iph->saddr, 4);
 845	return 4;
 846}
 847
 848static const struct header_ops ipgre_header_ops = {
 849	.create	= ipgre_header,
 850	.parse	= ipgre_header_parse,
 851};
 852
 853#ifdef CONFIG_NET_IPGRE_BROADCAST
 854static int ipgre_open(struct net_device *dev)
 855{
 856	struct ip_tunnel *t = netdev_priv(dev);
 857
 858	if (ipv4_is_multicast(t->parms.iph.daddr)) {
 859		struct flowi4 fl4;
 860		struct rtable *rt;
 861
 862		rt = ip_route_output_gre(t->net, &fl4,
 863					 t->parms.iph.daddr,
 864					 t->parms.iph.saddr,
 865					 t->parms.o_key,
 866					 RT_TOS(t->parms.iph.tos),
 867					 t->parms.link);
 868		if (IS_ERR(rt))
 869			return -EADDRNOTAVAIL;
 870		dev = rt->dst.dev;
 871		ip_rt_put(rt);
 872		if (!__in_dev_get_rtnl(dev))
 873			return -EADDRNOTAVAIL;
 874		t->mlink = dev->ifindex;
 875		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 876	}
 877	return 0;
 878}
 879
 880static int ipgre_close(struct net_device *dev)
 881{
 882	struct ip_tunnel *t = netdev_priv(dev);
 883
 884	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 885		struct in_device *in_dev;
 886		in_dev = inetdev_by_index(t->net, t->mlink);
 887		if (in_dev)
 888			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 889	}
 890	return 0;
 891}
 892#endif
 893
 894static const struct net_device_ops ipgre_netdev_ops = {
 895	.ndo_init		= ipgre_tunnel_init,
 896	.ndo_uninit		= ip_tunnel_uninit,
 897#ifdef CONFIG_NET_IPGRE_BROADCAST
 898	.ndo_open		= ipgre_open,
 899	.ndo_stop		= ipgre_close,
 900#endif
 901	.ndo_start_xmit		= ipgre_xmit,
 902	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
 903	.ndo_change_mtu		= ip_tunnel_change_mtu,
 904	.ndo_get_stats64	= ip_tunnel_get_stats64,
 905	.ndo_get_iflink		= ip_tunnel_get_iflink,
 
 906};
 907
 908#define GRE_FEATURES (NETIF_F_SG |		\
 909		      NETIF_F_FRAGLIST |	\
 910		      NETIF_F_HIGHDMA |		\
 911		      NETIF_F_HW_CSUM)
 912
 913static void ipgre_tunnel_setup(struct net_device *dev)
 914{
 915	dev->netdev_ops		= &ipgre_netdev_ops;
 916	dev->type		= ARPHRD_IPGRE;
 917	ip_tunnel_setup(dev, ipgre_net_id);
 918}
 919
 920static void __gre_tunnel_init(struct net_device *dev)
 921{
 922	struct ip_tunnel *tunnel;
 
 923
 924	tunnel = netdev_priv(dev);
 925	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 926	tunnel->parms.iph.protocol = IPPROTO_GRE;
 927
 928	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 
 929
 930	dev->features		|= GRE_FEATURES;
 931	dev->hw_features	|= GRE_FEATURES;
 932
 933	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 934		/* TCP offload with GRE SEQ is not supported, nor
 935		 * can we support 2 levels of outer headers requiring
 936		 * an update.
 937		 */
 938		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
 939		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
 940			dev->features    |= NETIF_F_GSO_SOFTWARE;
 941			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 942		}
 943
 944		/* Can use a lockless transmit, unless we generate
 945		 * output sequences
 946		 */
 947		dev->features |= NETIF_F_LLTX;
 948	}
 
 
 
 
 
 949}
 950
 951static int ipgre_tunnel_init(struct net_device *dev)
 952{
 953	struct ip_tunnel *tunnel = netdev_priv(dev);
 954	struct iphdr *iph = &tunnel->parms.iph;
 955
 956	__gre_tunnel_init(dev);
 957
 958	memcpy(dev->dev_addr, &iph->saddr, 4);
 959	memcpy(dev->broadcast, &iph->daddr, 4);
 960
 961	dev->flags		= IFF_NOARP;
 962	netif_keep_dst(dev);
 963	dev->addr_len		= 4;
 964
 965	if (iph->daddr && !tunnel->collect_md) {
 966#ifdef CONFIG_NET_IPGRE_BROADCAST
 967		if (ipv4_is_multicast(iph->daddr)) {
 968			if (!iph->saddr)
 969				return -EINVAL;
 970			dev->flags = IFF_BROADCAST;
 971			dev->header_ops = &ipgre_header_ops;
 
 
 972		}
 973#endif
 974	} else if (!tunnel->collect_md) {
 975		dev->header_ops = &ipgre_header_ops;
 
 
 976	}
 977
 978	return ip_tunnel_init(dev);
 979}
 980
 981static const struct gre_protocol ipgre_protocol = {
 982	.handler     = gre_rcv,
 983	.err_handler = gre_err,
 984};
 985
 986static int __net_init ipgre_init_net(struct net *net)
 987{
 988	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 989}
 990
 991static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
 
 992{
 993	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
 
 994}
 995
 996static struct pernet_operations ipgre_net_ops = {
 997	.init = ipgre_init_net,
 998	.exit_batch = ipgre_exit_batch_net,
 999	.id   = &ipgre_net_id,
1000	.size = sizeof(struct ip_tunnel_net),
1001};
1002
1003static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1004				 struct netlink_ext_ack *extack)
1005{
1006	__be16 flags;
1007
1008	if (!data)
1009		return 0;
1010
1011	flags = 0;
1012	if (data[IFLA_GRE_IFLAGS])
1013		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1014	if (data[IFLA_GRE_OFLAGS])
1015		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1016	if (flags & (GRE_VERSION|GRE_ROUTING))
1017		return -EINVAL;
1018
1019	if (data[IFLA_GRE_COLLECT_METADATA] &&
1020	    data[IFLA_GRE_ENCAP_TYPE] &&
1021	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1022		return -EINVAL;
1023
1024	return 0;
1025}
1026
1027static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1028			      struct netlink_ext_ack *extack)
1029{
1030	__be32 daddr;
1031
1032	if (tb[IFLA_ADDRESS]) {
1033		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1034			return -EINVAL;
1035		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1036			return -EADDRNOTAVAIL;
1037	}
1038
1039	if (!data)
1040		goto out;
1041
1042	if (data[IFLA_GRE_REMOTE]) {
1043		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1044		if (!daddr)
1045			return -EINVAL;
1046	}
1047
1048out:
1049	return ipgre_tunnel_validate(tb, data, extack);
1050}
1051
1052static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1053			   struct netlink_ext_ack *extack)
1054{
1055	__be16 flags = 0;
1056	int ret;
1057
1058	if (!data)
1059		return 0;
1060
1061	ret = ipgre_tap_validate(tb, data, extack);
1062	if (ret)
1063		return ret;
1064
1065	/* ERSPAN should only have GRE sequence and key flag */
 
 
 
 
1066	if (data[IFLA_GRE_OFLAGS])
1067		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1068	if (data[IFLA_GRE_IFLAGS])
1069		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1070	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1071	    flags != (GRE_SEQ | GRE_KEY))
1072		return -EINVAL;
1073
1074	/* ERSPAN Session ID only has 10-bit. Since we reuse
1075	 * 32-bit key field as ID, check it's range.
1076	 */
1077	if (data[IFLA_GRE_IKEY] &&
1078	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1079		return -EINVAL;
1080
1081	if (data[IFLA_GRE_OKEY] &&
1082	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1083		return -EINVAL;
1084
1085	return 0;
1086}
1087
1088static int ipgre_netlink_parms(struct net_device *dev,
1089				struct nlattr *data[],
1090				struct nlattr *tb[],
1091				struct ip_tunnel_parm *parms,
1092				__u32 *fwmark)
1093{
1094	struct ip_tunnel *t = netdev_priv(dev);
1095
1096	memset(parms, 0, sizeof(*parms));
1097
1098	parms->iph.protocol = IPPROTO_GRE;
1099
1100	if (!data)
1101		return 0;
1102
1103	if (data[IFLA_GRE_LINK])
1104		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1105
1106	if (data[IFLA_GRE_IFLAGS])
1107		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1108
1109	if (data[IFLA_GRE_OFLAGS])
1110		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1111
1112	if (data[IFLA_GRE_IKEY])
1113		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1114
1115	if (data[IFLA_GRE_OKEY])
1116		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1117
1118	if (data[IFLA_GRE_LOCAL])
1119		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1120
1121	if (data[IFLA_GRE_REMOTE])
1122		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1123
1124	if (data[IFLA_GRE_TTL])
1125		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1126
1127	if (data[IFLA_GRE_TOS])
1128		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1129
1130	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1131		if (t->ignore_df)
1132			return -EINVAL;
1133		parms->iph.frag_off = htons(IP_DF);
1134	}
1135
1136	if (data[IFLA_GRE_COLLECT_METADATA]) {
1137		t->collect_md = true;
1138		if (dev->type == ARPHRD_IPGRE)
1139			dev->type = ARPHRD_NONE;
1140	}
1141
1142	if (data[IFLA_GRE_IGNORE_DF]) {
1143		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1144		  && (parms->iph.frag_off & htons(IP_DF)))
1145			return -EINVAL;
1146		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1147	}
1148
1149	if (data[IFLA_GRE_FWMARK])
1150		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152	if (data[IFLA_GRE_ERSPAN_VER]) {
1153		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1154
1155		if (t->erspan_ver != 1 && t->erspan_ver != 2)
1156			return -EINVAL;
1157	}
1158
1159	if (t->erspan_ver == 1) {
1160		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1161			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1162			if (t->index & ~INDEX_MASK)
1163				return -EINVAL;
1164		}
1165	} else if (t->erspan_ver == 2) {
1166		if (data[IFLA_GRE_ERSPAN_DIR]) {
1167			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1168			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1169				return -EINVAL;
1170		}
1171		if (data[IFLA_GRE_ERSPAN_HWID]) {
1172			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1173			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1174				return -EINVAL;
1175		}
1176	}
1177
1178	return 0;
1179}
1180
1181/* This function returns true when ENCAP attributes are present in the nl msg */
1182static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1183				      struct ip_tunnel_encap *ipencap)
1184{
1185	bool ret = false;
1186
1187	memset(ipencap, 0, sizeof(*ipencap));
1188
1189	if (!data)
1190		return ret;
1191
1192	if (data[IFLA_GRE_ENCAP_TYPE]) {
1193		ret = true;
1194		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1195	}
1196
1197	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1198		ret = true;
1199		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1200	}
1201
1202	if (data[IFLA_GRE_ENCAP_SPORT]) {
1203		ret = true;
1204		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1205	}
1206
1207	if (data[IFLA_GRE_ENCAP_DPORT]) {
1208		ret = true;
1209		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1210	}
1211
1212	return ret;
1213}
1214
1215static int gre_tap_init(struct net_device *dev)
1216{
1217	__gre_tunnel_init(dev);
1218	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1219	netif_keep_dst(dev);
1220
1221	return ip_tunnel_init(dev);
1222}
1223
1224static const struct net_device_ops gre_tap_netdev_ops = {
1225	.ndo_init		= gre_tap_init,
1226	.ndo_uninit		= ip_tunnel_uninit,
1227	.ndo_start_xmit		= gre_tap_xmit,
1228	.ndo_set_mac_address 	= eth_mac_addr,
1229	.ndo_validate_addr	= eth_validate_addr,
1230	.ndo_change_mtu		= ip_tunnel_change_mtu,
1231	.ndo_get_stats64	= ip_tunnel_get_stats64,
1232	.ndo_get_iflink		= ip_tunnel_get_iflink,
1233	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1234};
1235
1236static int erspan_tunnel_init(struct net_device *dev)
1237{
1238	struct ip_tunnel *tunnel = netdev_priv(dev);
1239
1240	tunnel->tun_hlen = 8;
 
 
 
 
1241	tunnel->parms.iph.protocol = IPPROTO_GRE;
1242	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1243		       erspan_hdr_len(tunnel->erspan_ver);
1244
1245	dev->features		|= GRE_FEATURES;
1246	dev->hw_features	|= GRE_FEATURES;
1247	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1248	netif_keep_dst(dev);
1249
1250	return ip_tunnel_init(dev);
1251}
1252
1253static const struct net_device_ops erspan_netdev_ops = {
1254	.ndo_init		= erspan_tunnel_init,
1255	.ndo_uninit		= ip_tunnel_uninit,
1256	.ndo_start_xmit		= erspan_xmit,
1257	.ndo_set_mac_address	= eth_mac_addr,
1258	.ndo_validate_addr	= eth_validate_addr,
1259	.ndo_change_mtu		= ip_tunnel_change_mtu,
1260	.ndo_get_stats64	= ip_tunnel_get_stats64,
1261	.ndo_get_iflink		= ip_tunnel_get_iflink,
1262	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1263};
1264
1265static void ipgre_tap_setup(struct net_device *dev)
1266{
1267	ether_setup(dev);
1268	dev->max_mtu = 0;
1269	dev->netdev_ops	= &gre_tap_netdev_ops;
1270	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1271	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1272	ip_tunnel_setup(dev, gre_tap_net_id);
1273}
1274
1275static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1276			 struct nlattr *tb[], struct nlattr *data[],
1277			 struct netlink_ext_ack *extack)
1278{
1279	struct ip_tunnel_parm p;
1280	struct ip_tunnel_encap ipencap;
1281	__u32 fwmark = 0;
1282	int err;
1283
1284	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1285		struct ip_tunnel *t = netdev_priv(dev);
1286		err = ip_tunnel_encap_setup(t, &ipencap);
1287
1288		if (err < 0)
1289			return err;
1290	}
1291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1292	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1293	if (err < 0)
1294		return err;
1295	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1296}
1297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1298static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1299			    struct nlattr *data[],
1300			    struct netlink_ext_ack *extack)
1301{
1302	struct ip_tunnel *t = netdev_priv(dev);
1303	struct ip_tunnel_encap ipencap;
1304	__u32 fwmark = t->fwmark;
1305	struct ip_tunnel_parm p;
1306	int err;
1307
1308	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1309		err = ip_tunnel_encap_setup(t, &ipencap);
1310
1311		if (err < 0)
1312			return err;
1313	}
1314
1315	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1316	if (err < 0)
1317		return err;
1318
1319	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1320	if (err < 0)
1321		return err;
1322
1323	t->parms.i_flags = p.i_flags;
1324	t->parms.o_flags = p.o_flags;
1325
1326	if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1327		ipgre_link_update(dev, !tb[IFLA_MTU]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1328
1329	return 0;
1330}
1331
1332static size_t ipgre_get_size(const struct net_device *dev)
1333{
1334	return
1335		/* IFLA_GRE_LINK */
1336		nla_total_size(4) +
1337		/* IFLA_GRE_IFLAGS */
1338		nla_total_size(2) +
1339		/* IFLA_GRE_OFLAGS */
1340		nla_total_size(2) +
1341		/* IFLA_GRE_IKEY */
1342		nla_total_size(4) +
1343		/* IFLA_GRE_OKEY */
1344		nla_total_size(4) +
1345		/* IFLA_GRE_LOCAL */
1346		nla_total_size(4) +
1347		/* IFLA_GRE_REMOTE */
1348		nla_total_size(4) +
1349		/* IFLA_GRE_TTL */
1350		nla_total_size(1) +
1351		/* IFLA_GRE_TOS */
1352		nla_total_size(1) +
1353		/* IFLA_GRE_PMTUDISC */
1354		nla_total_size(1) +
1355		/* IFLA_GRE_ENCAP_TYPE */
1356		nla_total_size(2) +
1357		/* IFLA_GRE_ENCAP_FLAGS */
1358		nla_total_size(2) +
1359		/* IFLA_GRE_ENCAP_SPORT */
1360		nla_total_size(2) +
1361		/* IFLA_GRE_ENCAP_DPORT */
1362		nla_total_size(2) +
1363		/* IFLA_GRE_COLLECT_METADATA */
1364		nla_total_size(0) +
1365		/* IFLA_GRE_IGNORE_DF */
1366		nla_total_size(1) +
1367		/* IFLA_GRE_FWMARK */
1368		nla_total_size(4) +
1369		/* IFLA_GRE_ERSPAN_INDEX */
1370		nla_total_size(4) +
1371		/* IFLA_GRE_ERSPAN_VER */
1372		nla_total_size(1) +
1373		/* IFLA_GRE_ERSPAN_DIR */
1374		nla_total_size(1) +
1375		/* IFLA_GRE_ERSPAN_HWID */
1376		nla_total_size(2) +
1377		0;
1378}
1379
1380static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1381{
1382	struct ip_tunnel *t = netdev_priv(dev);
1383	struct ip_tunnel_parm *p = &t->parms;
1384	__be16 o_flags = p->o_flags;
1385
1386	if (t->erspan_ver == 1 || t->erspan_ver == 2) {
1387		if (!t->collect_md)
1388			o_flags |= TUNNEL_KEY;
1389
1390		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1391			goto nla_put_failure;
1392
1393		if (t->erspan_ver == 1) {
1394			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1395				goto nla_put_failure;
1396		} else {
1397			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1398				goto nla_put_failure;
1399			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1400				goto nla_put_failure;
1401		}
1402	}
1403
1404	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1405	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1406			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1407	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1408			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1409	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1410	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1411	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1412	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1413	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1414	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1415	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1416		       !!(p->iph.frag_off & htons(IP_DF))) ||
1417	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1418		goto nla_put_failure;
1419
1420	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1421			t->encap.type) ||
1422	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1423			 t->encap.sport) ||
1424	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1425			 t->encap.dport) ||
1426	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1427			t->encap.flags))
1428		goto nla_put_failure;
1429
1430	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1431		goto nla_put_failure;
1432
1433	if (t->collect_md) {
1434		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1435			goto nla_put_failure;
1436	}
1437
1438	return 0;
1439
1440nla_put_failure:
1441	return -EMSGSIZE;
1442}
1443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1444static void erspan_setup(struct net_device *dev)
1445{
1446	struct ip_tunnel *t = netdev_priv(dev);
1447
1448	ether_setup(dev);
1449	dev->max_mtu = 0;
1450	dev->netdev_ops = &erspan_netdev_ops;
1451	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1452	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1453	ip_tunnel_setup(dev, erspan_net_id);
1454	t->erspan_ver = 1;
1455}
1456
1457static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1458	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1459	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1460	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1461	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1462	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1463	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1464	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1465	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1466	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1467	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1468	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1469	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1470	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1471	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1472	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1473	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1474	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1475	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1476	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1477	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1478	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1479};
1480
1481static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1482	.kind		= "gre",
1483	.maxtype	= IFLA_GRE_MAX,
1484	.policy		= ipgre_policy,
1485	.priv_size	= sizeof(struct ip_tunnel),
1486	.setup		= ipgre_tunnel_setup,
1487	.validate	= ipgre_tunnel_validate,
1488	.newlink	= ipgre_newlink,
1489	.changelink	= ipgre_changelink,
1490	.dellink	= ip_tunnel_dellink,
1491	.get_size	= ipgre_get_size,
1492	.fill_info	= ipgre_fill_info,
1493	.get_link_net	= ip_tunnel_get_link_net,
1494};
1495
1496static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1497	.kind		= "gretap",
1498	.maxtype	= IFLA_GRE_MAX,
1499	.policy		= ipgre_policy,
1500	.priv_size	= sizeof(struct ip_tunnel),
1501	.setup		= ipgre_tap_setup,
1502	.validate	= ipgre_tap_validate,
1503	.newlink	= ipgre_newlink,
1504	.changelink	= ipgre_changelink,
1505	.dellink	= ip_tunnel_dellink,
1506	.get_size	= ipgre_get_size,
1507	.fill_info	= ipgre_fill_info,
1508	.get_link_net	= ip_tunnel_get_link_net,
1509};
1510
1511static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1512	.kind		= "erspan",
1513	.maxtype	= IFLA_GRE_MAX,
1514	.policy		= ipgre_policy,
1515	.priv_size	= sizeof(struct ip_tunnel),
1516	.setup		= erspan_setup,
1517	.validate	= erspan_validate,
1518	.newlink	= ipgre_newlink,
1519	.changelink	= ipgre_changelink,
1520	.dellink	= ip_tunnel_dellink,
1521	.get_size	= ipgre_get_size,
1522	.fill_info	= ipgre_fill_info,
1523	.get_link_net	= ip_tunnel_get_link_net,
1524};
1525
1526struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1527					u8 name_assign_type)
1528{
1529	struct nlattr *tb[IFLA_MAX + 1];
1530	struct net_device *dev;
1531	LIST_HEAD(list_kill);
1532	struct ip_tunnel *t;
1533	int err;
1534
1535	memset(&tb, 0, sizeof(tb));
1536
1537	dev = rtnl_create_link(net, name, name_assign_type,
1538			       &ipgre_tap_ops, tb, NULL);
1539	if (IS_ERR(dev))
1540		return dev;
1541
1542	/* Configure flow based GRE device. */
1543	t = netdev_priv(dev);
1544	t->collect_md = true;
1545
1546	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1547	if (err < 0) {
1548		free_netdev(dev);
1549		return ERR_PTR(err);
1550	}
1551
1552	/* openvswitch users expect packet sizes to be unrestricted,
1553	 * so set the largest MTU we can.
1554	 */
1555	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1556	if (err)
1557		goto out;
1558
1559	err = rtnl_configure_link(dev, NULL);
1560	if (err < 0)
1561		goto out;
1562
1563	return dev;
1564out:
1565	ip_tunnel_dellink(dev, &list_kill);
1566	unregister_netdevice_many(&list_kill);
1567	return ERR_PTR(err);
1568}
1569EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1570
1571static int __net_init ipgre_tap_init_net(struct net *net)
1572{
1573	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1574}
1575
1576static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
 
1577{
1578	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
 
1579}
1580
1581static struct pernet_operations ipgre_tap_net_ops = {
1582	.init = ipgre_tap_init_net,
1583	.exit_batch = ipgre_tap_exit_batch_net,
1584	.id   = &gre_tap_net_id,
1585	.size = sizeof(struct ip_tunnel_net),
1586};
1587
1588static int __net_init erspan_init_net(struct net *net)
1589{
1590	return ip_tunnel_init_net(net, erspan_net_id,
1591				  &erspan_link_ops, "erspan0");
1592}
1593
1594static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
 
1595{
1596	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
 
1597}
1598
1599static struct pernet_operations erspan_net_ops = {
1600	.init = erspan_init_net,
1601	.exit_batch = erspan_exit_batch_net,
1602	.id   = &erspan_net_id,
1603	.size = sizeof(struct ip_tunnel_net),
1604};
1605
1606static int __init ipgre_init(void)
1607{
1608	int err;
1609
1610	pr_info("GRE over IPv4 tunneling driver\n");
1611
1612	err = register_pernet_device(&ipgre_net_ops);
1613	if (err < 0)
1614		return err;
1615
1616	err = register_pernet_device(&ipgre_tap_net_ops);
1617	if (err < 0)
1618		goto pnet_tap_failed;
1619
1620	err = register_pernet_device(&erspan_net_ops);
1621	if (err < 0)
1622		goto pnet_erspan_failed;
1623
1624	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1625	if (err < 0) {
1626		pr_info("%s: can't add protocol\n", __func__);
1627		goto add_proto_failed;
1628	}
1629
1630	err = rtnl_link_register(&ipgre_link_ops);
1631	if (err < 0)
1632		goto rtnl_link_failed;
1633
1634	err = rtnl_link_register(&ipgre_tap_ops);
1635	if (err < 0)
1636		goto tap_ops_failed;
1637
1638	err = rtnl_link_register(&erspan_link_ops);
1639	if (err < 0)
1640		goto erspan_link_failed;
1641
1642	return 0;
1643
1644erspan_link_failed:
1645	rtnl_link_unregister(&ipgre_tap_ops);
1646tap_ops_failed:
1647	rtnl_link_unregister(&ipgre_link_ops);
1648rtnl_link_failed:
1649	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1650add_proto_failed:
1651	unregister_pernet_device(&erspan_net_ops);
1652pnet_erspan_failed:
1653	unregister_pernet_device(&ipgre_tap_net_ops);
1654pnet_tap_failed:
1655	unregister_pernet_device(&ipgre_net_ops);
1656	return err;
1657}
1658
1659static void __exit ipgre_fini(void)
1660{
1661	rtnl_link_unregister(&ipgre_tap_ops);
1662	rtnl_link_unregister(&ipgre_link_ops);
1663	rtnl_link_unregister(&erspan_link_ops);
1664	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1665	unregister_pernet_device(&ipgre_tap_net_ops);
1666	unregister_pernet_device(&ipgre_net_ops);
1667	unregister_pernet_device(&erspan_net_ops);
1668}
1669
1670module_init(ipgre_init);
1671module_exit(ipgre_fini);
 
1672MODULE_LICENSE("GPL");
1673MODULE_ALIAS_RTNL_LINK("gre");
1674MODULE_ALIAS_RTNL_LINK("gretap");
1675MODULE_ALIAS_RTNL_LINK("erspan");
1676MODULE_ALIAS_NETDEV("gre0");
1677MODULE_ALIAS_NETDEV("gretap0");
1678MODULE_ALIAS_NETDEV("erspan0");
v6.9.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	Linux NET3:	GRE over IP protocol decoder.
   4 *
   5 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   6 */
   7
   8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9
  10#include <linux/capability.h>
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/uaccess.h>
  16#include <linux/skbuff.h>
  17#include <linux/netdevice.h>
  18#include <linux/in.h>
  19#include <linux/tcp.h>
  20#include <linux/udp.h>
  21#include <linux/if_arp.h>
  22#include <linux/if_vlan.h>
  23#include <linux/init.h>
  24#include <linux/in6.h>
  25#include <linux/inetdevice.h>
  26#include <linux/igmp.h>
  27#include <linux/netfilter_ipv4.h>
  28#include <linux/etherdevice.h>
  29#include <linux/if_ether.h>
  30
  31#include <net/sock.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/protocol.h>
  35#include <net/ip_tunnels.h>
  36#include <net/arp.h>
  37#include <net/checksum.h>
  38#include <net/dsfield.h>
  39#include <net/inet_ecn.h>
  40#include <net/xfrm.h>
  41#include <net/net_namespace.h>
  42#include <net/netns/generic.h>
  43#include <net/rtnetlink.h>
  44#include <net/gre.h>
  45#include <net/dst_metadata.h>
  46#include <net/erspan.h>
  47
  48/*
  49   Problems & solutions
  50   --------------------
  51
  52   1. The most important issue is detecting local dead loops.
  53   They would cause complete host lockup in transmit, which
  54   would be "resolved" by stack overflow or, if queueing is enabled,
  55   with infinite looping in net_bh.
  56
  57   We cannot track such dead loops during route installation,
  58   it is infeasible task. The most general solutions would be
  59   to keep skb->encapsulation counter (sort of local ttl),
  60   and silently drop packet when it expires. It is a good
  61   solution, but it supposes maintaining new variable in ALL
  62   skb, even if no tunneling is used.
  63
  64   Current solution: xmit_recursion breaks dead loops. This is a percpu
  65   counter, since when we enter the first ndo_xmit(), cpu migration is
  66   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  67
  68   2. Networking dead loops would not kill routers, but would really
  69   kill network. IP hop limit plays role of "t->recursion" in this case,
  70   if we copy it from packet being encapsulated to upper header.
  71   It is very good solution, but it introduces two problems:
  72
  73   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  74     do not work over tunnels.
  75   - traceroute does not work. I planned to relay ICMP from tunnel,
  76     so that this problem would be solved and traceroute output
  77     would even more informative. This idea appeared to be wrong:
  78     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  79     true router now :-)), all routers (at least, in neighbourhood of mine)
  80     return only 8 bytes of payload. It is the end.
  81
  82   Hence, if we want that OSPF worked or traceroute said something reasonable,
  83   we should search for another solution.
  84
  85   One of them is to parse packet trying to detect inner encapsulation
  86   made by our node. It is difficult or even impossible, especially,
  87   taking into account fragmentation. TO be short, ttl is not solution at all.
  88
  89   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  90   We force DF flag on tunnels with preconfigured hop limit,
  91   that is ALL. :-) Well, it does not remove the problem completely,
  92   but exponential growth of network traffic is changed to linear
  93   (branches, that exceed pmtu are pruned) and tunnel mtu
  94   rapidly degrades to value <68, where looping stops.
  95   Yes, it is not good if there exists a router in the loop,
  96   which does not force DF, even when encapsulating packets have DF set.
  97   But it is not our problem! Nobody could accuse us, we made
  98   all that we could make. Even if it is your gated who injected
  99   fatal route to network, even if it were you who configured
 100   fatal static route: you are innocent. :-)
 101
 102   Alexey Kuznetsov.
 103 */
 104
 105static bool log_ecn_error = true;
 106module_param(log_ecn_error, bool, 0644);
 107MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 108
 109static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 110static const struct header_ops ipgre_header_ops;
 111
 112static int ipgre_tunnel_init(struct net_device *dev);
 113static void erspan_build_header(struct sk_buff *skb,
 114				u32 id, u32 index,
 115				bool truncate, bool is_ipv4);
 116
 117static unsigned int ipgre_net_id __read_mostly;
 118static unsigned int gre_tap_net_id __read_mostly;
 119static unsigned int erspan_net_id __read_mostly;
 120
 121static int ipgre_err(struct sk_buff *skb, u32 info,
 122		     const struct tnl_ptk_info *tpi)
 123{
 124
 125	/* All the routers (except for Linux) return only
 126	   8 bytes of packet payload. It means, that precise relaying of
 127	   ICMP in the real Internet is absolutely infeasible.
 128
 129	   Moreover, Cisco "wise men" put GRE key to the third word
 130	   in GRE header. It makes impossible maintaining even soft
 131	   state for keyed GRE tunnels with enabled checksum. Tell
 132	   them "thank you".
 133
 134	   Well, I wonder, rfc1812 was written by Cisco employee,
 135	   what the hell these idiots break standards established
 136	   by themselves???
 137	   */
 138	struct net *net = dev_net(skb->dev);
 139	struct ip_tunnel_net *itn;
 140	const struct iphdr *iph;
 141	const int type = icmp_hdr(skb)->type;
 142	const int code = icmp_hdr(skb)->code;
 143	unsigned int data_len = 0;
 144	struct ip_tunnel *t;
 145
 146	if (tpi->proto == htons(ETH_P_TEB))
 147		itn = net_generic(net, gre_tap_net_id);
 148	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
 149		 tpi->proto == htons(ETH_P_ERSPAN2))
 150		itn = net_generic(net, erspan_net_id);
 151	else
 152		itn = net_generic(net, ipgre_net_id);
 153
 154	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
 155	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 156			     iph->daddr, iph->saddr, tpi->key);
 157
 158	if (!t)
 159		return -ENOENT;
 160
 161	switch (type) {
 162	default:
 163	case ICMP_PARAMETERPROB:
 164		return 0;
 165
 166	case ICMP_DEST_UNREACH:
 167		switch (code) {
 168		case ICMP_SR_FAILED:
 169		case ICMP_PORT_UNREACH:
 170			/* Impossible event. */
 171			return 0;
 172		default:
 173			/* All others are translated to HOST_UNREACH.
 174			   rfc2003 contains "deep thoughts" about NET_UNREACH,
 175			   I believe they are just ether pollution. --ANK
 176			 */
 177			break;
 178		}
 179		break;
 180
 181	case ICMP_TIME_EXCEEDED:
 182		if (code != ICMP_EXC_TTL)
 183			return 0;
 184		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 185		break;
 186
 187	case ICMP_REDIRECT:
 188		break;
 189	}
 190
 191#if IS_ENABLED(CONFIG_IPV6)
 192	if (tpi->proto == htons(ETH_P_IPV6) &&
 193	    !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
 194					type, data_len))
 195		return 0;
 196#endif
 197
 198	if (t->parms.iph.daddr == 0 ||
 199	    ipv4_is_multicast(t->parms.iph.daddr))
 200		return 0;
 201
 202	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 203		return 0;
 204
 205	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 206		t->err_count++;
 207	else
 208		t->err_count = 1;
 209	t->err_time = jiffies;
 210
 211	return 0;
 212}
 213
 214static void gre_err(struct sk_buff *skb, u32 info)
 215{
 216	/* All the routers (except for Linux) return only
 217	 * 8 bytes of packet payload. It means, that precise relaying of
 218	 * ICMP in the real Internet is absolutely infeasible.
 219	 *
 220	 * Moreover, Cisco "wise men" put GRE key to the third word
 221	 * in GRE header. It makes impossible maintaining even soft
 222	 * state for keyed
 223	 * GRE tunnels with enabled checksum. Tell them "thank you".
 224	 *
 225	 * Well, I wonder, rfc1812 was written by Cisco employee,
 226	 * what the hell these idiots break standards established
 227	 * by themselves???
 228	 */
 229
 230	const struct iphdr *iph = (struct iphdr *)skb->data;
 231	const int type = icmp_hdr(skb)->type;
 232	const int code = icmp_hdr(skb)->code;
 233	struct tnl_ptk_info tpi;
 234
 235	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
 236			     iph->ihl * 4) < 0)
 237		return;
 238
 239	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 240		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 241				 skb->dev->ifindex, IPPROTO_GRE);
 242		return;
 243	}
 244	if (type == ICMP_REDIRECT) {
 245		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
 246			      IPPROTO_GRE);
 247		return;
 248	}
 249
 250	ipgre_err(skb, info, &tpi);
 251}
 252
 253static bool is_erspan_type1(int gre_hdr_len)
 254{
 255	/* Both ERSPAN type I (version 0) and type II (version 1) use
 256	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
 257	 * while type II has 8-byte.
 258	 */
 259	return gre_hdr_len == 4;
 260}
 261
 262static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 263		      int gre_hdr_len)
 264{
 265	struct net *net = dev_net(skb->dev);
 266	struct metadata_dst *tun_dst = NULL;
 267	struct erspan_base_hdr *ershdr;
 268	struct ip_tunnel_net *itn;
 269	struct ip_tunnel *tunnel;
 270	const struct iphdr *iph;
 271	struct erspan_md2 *md2;
 272	int ver;
 273	int len;
 274
 275	itn = net_generic(net, erspan_net_id);
 
 276	iph = ip_hdr(skb);
 277	if (is_erspan_type1(gre_hdr_len)) {
 278		ver = 0;
 279		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
 280					  tpi->flags | TUNNEL_NO_KEY,
 281					  iph->saddr, iph->daddr, 0);
 282	} else {
 283		if (unlikely(!pskb_may_pull(skb,
 284					    gre_hdr_len + sizeof(*ershdr))))
 285			return PACKET_REJECT;
 286
 287		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
 288		ver = ershdr->ver;
 289		iph = ip_hdr(skb);
 290		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
 291					  tpi->flags | TUNNEL_KEY,
 292					  iph->saddr, iph->daddr, tpi->key);
 293	}
 294
 295	if (tunnel) {
 296		if (is_erspan_type1(gre_hdr_len))
 297			len = gre_hdr_len;
 298		else
 299			len = gre_hdr_len + erspan_hdr_len(ver);
 300
 301		if (unlikely(!pskb_may_pull(skb, len)))
 302			return PACKET_REJECT;
 303
 304		if (__iptunnel_pull_header(skb,
 305					   len,
 306					   htons(ETH_P_TEB),
 307					   false, false) < 0)
 308			goto drop;
 309
 310		if (tunnel->collect_md) {
 311			struct erspan_metadata *pkt_md, *md;
 312			struct ip_tunnel_info *info;
 313			unsigned char *gh;
 314			__be64 tun_id;
 315			__be16 flags;
 316
 317			tpi->flags |= TUNNEL_KEY;
 318			flags = tpi->flags;
 319			tun_id = key32_to_tunnel_id(tpi->key);
 320
 321			tun_dst = ip_tun_rx_dst(skb, flags,
 322						tun_id, sizeof(*md));
 323			if (!tun_dst)
 324				return PACKET_REJECT;
 325
 326			/* skb can be uncloned in __iptunnel_pull_header, so
 327			 * old pkt_md is no longer valid and we need to reset
 328			 * it
 329			 */
 330			gh = skb_network_header(skb) +
 331			     skb_network_header_len(skb);
 332			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
 333							    sizeof(*ershdr));
 334			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 335			md->version = ver;
 336			md2 = &md->u.md2;
 337			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
 338						       ERSPAN_V2_MDSIZE);
 339
 340			info = &tun_dst->u.tun_info;
 341			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
 342			info->options_len = sizeof(*md);
 343		}
 344
 345		skb_reset_mac_header(skb);
 346		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 347		return PACKET_RCVD;
 348	}
 349	return PACKET_REJECT;
 350
 351drop:
 352	kfree_skb(skb);
 353	return PACKET_RCVD;
 354}
 355
 356static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 357		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 358{
 359	struct metadata_dst *tun_dst = NULL;
 360	const struct iphdr *iph;
 361	struct ip_tunnel *tunnel;
 362
 363	iph = ip_hdr(skb);
 364	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 365				  iph->saddr, iph->daddr, tpi->key);
 366
 367	if (tunnel) {
 368		const struct iphdr *tnl_params;
 369
 370		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
 371					   raw_proto, false) < 0)
 372			goto drop;
 373
 374		/* Special case for ipgre_header_parse(), which expects the
 375		 * mac_header to point to the outer IP header.
 376		 */
 377		if (tunnel->dev->header_ops == &ipgre_header_ops)
 378			skb_pop_mac_header(skb);
 379		else
 380			skb_reset_mac_header(skb);
 381
 382		tnl_params = &tunnel->parms.iph;
 383		if (tunnel->collect_md || tnl_params->daddr == 0) {
 384			__be16 flags;
 385			__be64 tun_id;
 386
 387			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
 388			tun_id = key32_to_tunnel_id(tpi->key);
 389			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 390			if (!tun_dst)
 391				return PACKET_REJECT;
 392		}
 393
 394		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 395		return PACKET_RCVD;
 396	}
 397	return PACKET_NEXT;
 398
 399drop:
 400	kfree_skb(skb);
 401	return PACKET_RCVD;
 402}
 403
 404static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 405		     int hdr_len)
 406{
 407	struct net *net = dev_net(skb->dev);
 408	struct ip_tunnel_net *itn;
 409	int res;
 410
 411	if (tpi->proto == htons(ETH_P_TEB))
 412		itn = net_generic(net, gre_tap_net_id);
 413	else
 414		itn = net_generic(net, ipgre_net_id);
 415
 416	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
 417	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
 418		/* ipgre tunnels in collect metadata mode should receive
 419		 * also ETH_P_TEB traffic.
 420		 */
 421		itn = net_generic(net, ipgre_net_id);
 422		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
 423	}
 424	return res;
 425}
 426
 427static int gre_rcv(struct sk_buff *skb)
 428{
 429	struct tnl_ptk_info tpi;
 430	bool csum_err = false;
 431	int hdr_len;
 432
 433#ifdef CONFIG_NET_IPGRE_BROADCAST
 434	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
 435		/* Looped back packet, drop it! */
 436		if (rt_is_output_route(skb_rtable(skb)))
 437			goto drop;
 438	}
 439#endif
 440
 441	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 442	if (hdr_len < 0)
 443		goto drop;
 444
 445	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 446		     tpi.proto == htons(ETH_P_ERSPAN2))) {
 447		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 448			return 0;
 449		goto out;
 450	}
 451
 452	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 453		return 0;
 454
 455out:
 456	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 457drop:
 458	kfree_skb(skb);
 459	return 0;
 460}
 461
 462static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 463		       const struct iphdr *tnl_params,
 464		       __be16 proto)
 465{
 466	struct ip_tunnel *tunnel = netdev_priv(dev);
 467	__be16 flags = tunnel->parms.o_flags;
 
 
 468
 469	/* Push GRE header. */
 470	gre_build_header(skb, tunnel->tun_hlen,
 471			 flags, proto, tunnel->parms.o_key,
 472			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
 473
 474	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 475}
 476
 477static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 478{
 479	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 480}
 481
 482static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 483			__be16 proto)
 484{
 485	struct ip_tunnel *tunnel = netdev_priv(dev);
 486	struct ip_tunnel_info *tun_info;
 487	const struct ip_tunnel_key *key;
 488	int tunnel_hlen;
 489	__be16 flags;
 490
 491	tun_info = skb_tunnel_info(skb);
 492	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 493		     ip_tunnel_info_af(tun_info) != AF_INET))
 494		goto err_free_skb;
 495
 496	key = &tun_info->key;
 497	tunnel_hlen = gre_calc_hlen(key->tun_flags);
 498
 499	if (skb_cow_head(skb, dev->needed_headroom))
 500		goto err_free_skb;
 501
 502	/* Push Tunnel header. */
 503	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 504		goto err_free_skb;
 505
 506	flags = tun_info->key.tun_flags &
 507		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 508	gre_build_header(skb, tunnel_hlen, flags, proto,
 509			 tunnel_id_to_key32(tun_info->key.tun_id),
 510			 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
 511
 512	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 513
 514	return;
 515
 516err_free_skb:
 517	kfree_skb(skb);
 518	DEV_STATS_INC(dev, tx_dropped);
 519}
 520
 521static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 522{
 523	struct ip_tunnel *tunnel = netdev_priv(dev);
 524	struct ip_tunnel_info *tun_info;
 525	const struct ip_tunnel_key *key;
 526	struct erspan_metadata *md;
 527	bool truncate = false;
 528	__be16 proto;
 529	int tunnel_hlen;
 530	int version;
 531	int nhoff;
 
 532
 533	tun_info = skb_tunnel_info(skb);
 534	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 535		     ip_tunnel_info_af(tun_info) != AF_INET))
 536		goto err_free_skb;
 537
 538	key = &tun_info->key;
 539	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
 540		goto err_free_skb;
 541	if (tun_info->options_len < sizeof(*md))
 542		goto err_free_skb;
 543	md = ip_tunnel_info_opts(tun_info);
 544
 545	/* ERSPAN has fixed 8 byte GRE header */
 546	version = md->version;
 547	tunnel_hlen = 8 + erspan_hdr_len(version);
 548
 549	if (skb_cow_head(skb, dev->needed_headroom))
 550		goto err_free_skb;
 551
 552	if (gre_handle_offloads(skb, false))
 553		goto err_free_skb;
 554
 555	if (skb->len > dev->mtu + dev->hard_header_len) {
 556		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
 557			goto err_free_skb;
 558		truncate = true;
 559	}
 560
 561	nhoff = skb_network_offset(skb);
 562	if (skb->protocol == htons(ETH_P_IP) &&
 563	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
 564		truncate = true;
 565
 566	if (skb->protocol == htons(ETH_P_IPV6)) {
 567		int thoff;
 568
 569		if (skb_transport_header_was_set(skb))
 570			thoff = skb_transport_offset(skb);
 571		else
 572			thoff = nhoff + sizeof(struct ipv6hdr);
 573		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
 574			truncate = true;
 575	}
 576
 577	if (version == 1) {
 578		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
 579				    ntohl(md->u.index), truncate, true);
 580		proto = htons(ETH_P_ERSPAN);
 581	} else if (version == 2) {
 582		erspan_build_header_v2(skb,
 583				       ntohl(tunnel_id_to_key32(key->tun_id)),
 584				       md->u.md2.dir,
 585				       get_hwid(&md->u.md2),
 586				       truncate, true);
 587		proto = htons(ETH_P_ERSPAN2);
 588	} else {
 589		goto err_free_skb;
 590	}
 591
 592	gre_build_header(skb, 8, TUNNEL_SEQ,
 593			 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
 594
 595	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
 596
 597	return;
 598
 599err_free_skb:
 600	kfree_skb(skb);
 601	DEV_STATS_INC(dev, tx_dropped);
 602}
 603
 604static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 605{
 606	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 607	const struct ip_tunnel_key *key;
 608	struct rtable *rt;
 609	struct flowi4 fl4;
 610
 611	if (ip_tunnel_info_af(info) != AF_INET)
 612		return -EINVAL;
 613
 614	key = &info->key;
 615	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
 616			    tunnel_id_to_key32(key->tun_id),
 617			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
 618			    skb->mark, skb_get_hash(skb), key->flow_flags);
 619	rt = ip_route_output_key(dev_net(dev), &fl4);
 620	if (IS_ERR(rt))
 621		return PTR_ERR(rt);
 622
 623	ip_rt_put(rt);
 624	info->key.u.ipv4.src = fl4.saddr;
 625	return 0;
 626}
 627
 628static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 629			      struct net_device *dev)
 630{
 631	struct ip_tunnel *tunnel = netdev_priv(dev);
 632	const struct iphdr *tnl_params;
 633
 634	if (!pskb_inet_may_pull(skb))
 635		goto free_skb;
 636
 637	if (tunnel->collect_md) {
 638		gre_fb_xmit(skb, dev, skb->protocol);
 639		return NETDEV_TX_OK;
 640	}
 641
 642	if (dev->header_ops) {
 643		int pull_len = tunnel->hlen + sizeof(struct iphdr);
 644
 645		if (skb_cow_head(skb, 0))
 646			goto free_skb;
 647
 648		tnl_params = (const struct iphdr *)skb->data;
 649
 650		if (!pskb_network_may_pull(skb, pull_len))
 651			goto free_skb;
 652
 653		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
 654		skb_pull(skb, pull_len);
 655		skb_reset_mac_header(skb);
 656
 657		if (skb->ip_summed == CHECKSUM_PARTIAL &&
 658		    skb_checksum_start(skb) < skb->data)
 659			goto free_skb;
 660	} else {
 661		if (skb_cow_head(skb, dev->needed_headroom))
 662			goto free_skb;
 663
 664		tnl_params = &tunnel->parms.iph;
 665	}
 666
 667	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 668		goto free_skb;
 669
 670	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 671	return NETDEV_TX_OK;
 672
 673free_skb:
 674	kfree_skb(skb);
 675	DEV_STATS_INC(dev, tx_dropped);
 676	return NETDEV_TX_OK;
 677}
 678
 679static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 680			       struct net_device *dev)
 681{
 682	struct ip_tunnel *tunnel = netdev_priv(dev);
 683	bool truncate = false;
 684	__be16 proto;
 685
 686	if (!pskb_inet_may_pull(skb))
 687		goto free_skb;
 688
 689	if (tunnel->collect_md) {
 690		erspan_fb_xmit(skb, dev);
 691		return NETDEV_TX_OK;
 692	}
 693
 694	if (gre_handle_offloads(skb, false))
 695		goto free_skb;
 696
 697	if (skb_cow_head(skb, dev->needed_headroom))
 698		goto free_skb;
 699
 700	if (skb->len > dev->mtu + dev->hard_header_len) {
 701		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
 702			goto free_skb;
 703		truncate = true;
 704	}
 705
 706	/* Push ERSPAN header */
 707	if (tunnel->erspan_ver == 0) {
 708		proto = htons(ETH_P_ERSPAN);
 709		tunnel->parms.o_flags &= ~TUNNEL_SEQ;
 710	} else if (tunnel->erspan_ver == 1) {
 711		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
 712				    tunnel->index,
 713				    truncate, true);
 714		proto = htons(ETH_P_ERSPAN);
 715	} else if (tunnel->erspan_ver == 2) {
 716		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
 717				       tunnel->dir, tunnel->hwid,
 718				       truncate, true);
 719		proto = htons(ETH_P_ERSPAN2);
 720	} else {
 721		goto free_skb;
 722	}
 723
 724	tunnel->parms.o_flags &= ~TUNNEL_KEY;
 725	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
 726	return NETDEV_TX_OK;
 727
 728free_skb:
 729	kfree_skb(skb);
 730	DEV_STATS_INC(dev, tx_dropped);
 731	return NETDEV_TX_OK;
 732}
 733
 734static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 735				struct net_device *dev)
 736{
 737	struct ip_tunnel *tunnel = netdev_priv(dev);
 738
 739	if (!pskb_inet_may_pull(skb))
 740		goto free_skb;
 741
 742	if (tunnel->collect_md) {
 743		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 744		return NETDEV_TX_OK;
 745	}
 746
 747	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 748		goto free_skb;
 749
 750	if (skb_cow_head(skb, dev->needed_headroom))
 751		goto free_skb;
 752
 753	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 754	return NETDEV_TX_OK;
 755
 756free_skb:
 757	kfree_skb(skb);
 758	DEV_STATS_INC(dev, tx_dropped);
 759	return NETDEV_TX_OK;
 760}
 761
 762static void ipgre_link_update(struct net_device *dev, bool set_mtu)
 763{
 764	struct ip_tunnel *tunnel = netdev_priv(dev);
 765	__be16 flags;
 766	int len;
 767
 768	len = tunnel->tun_hlen;
 769	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 770	len = tunnel->tun_hlen - len;
 771	tunnel->hlen = tunnel->hlen + len;
 772
 773	if (dev->header_ops)
 774		dev->hard_header_len += len;
 775	else
 776		dev->needed_headroom += len;
 777
 778	if (set_mtu)
 779		dev->mtu = max_t(int, dev->mtu - len, 68);
 780
 781	flags = tunnel->parms.o_flags;
 782
 783	if (flags & TUNNEL_SEQ ||
 784	    (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
 785		dev->features &= ~NETIF_F_GSO_SOFTWARE;
 
 
 
 
 
 
 786		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 787	} else {
 788		dev->features |= NETIF_F_GSO_SOFTWARE;
 789		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 790	}
 791}
 792
 793static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
 794			    int cmd)
 795{
 
 796	int err;
 797
 
 
 
 798	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 799		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
 800		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
 801		    ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
 802			return -EINVAL;
 803	}
 804
 805	p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
 806	p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
 807
 808	err = ip_tunnel_ctl(dev, p, cmd);
 809	if (err)
 810		return err;
 811
 812	if (cmd == SIOCCHGTUNNEL) {
 813		struct ip_tunnel *t = netdev_priv(dev);
 814
 815		t->parms.i_flags = p->i_flags;
 816		t->parms.o_flags = p->o_flags;
 817
 818		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
 819			ipgre_link_update(dev, true);
 820	}
 821
 822	p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
 823	p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
 
 
 
 
 824	return 0;
 825}
 826
 827/* Nice toy. Unfortunately, useless in real life :-)
 828   It allows to construct virtual multiprotocol broadcast "LAN"
 829   over the Internet, provided multicast routing is tuned.
 830
 831
 832   I have no idea was this bicycle invented before me,
 833   so that I had to set ARPHRD_IPGRE to a random value.
 834   I have an impression, that Cisco could make something similar,
 835   but this feature is apparently missing in IOS<=11.2(8).
 836
 837   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 838   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 839
 840   ping -t 255 224.66.66.66
 841
 842   If nobody answers, mbone does not work.
 843
 844   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 845   ip addr add 10.66.66.<somewhat>/24 dev Universe
 846   ifconfig Universe up
 847   ifconfig Universe add fe80::<Your_real_addr>/10
 848   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 849   ftp 10.66.66.66
 850   ...
 851   ftp fec0:6666:6666::193.233.7.65
 852   ...
 853 */
 854static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 855			unsigned short type,
 856			const void *daddr, const void *saddr, unsigned int len)
 857{
 858	struct ip_tunnel *t = netdev_priv(dev);
 859	struct iphdr *iph;
 860	struct gre_base_hdr *greh;
 861
 862	iph = skb_push(skb, t->hlen + sizeof(*iph));
 863	greh = (struct gre_base_hdr *)(iph+1);
 864	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
 865	greh->protocol = htons(type);
 866
 867	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 868
 869	/* Set the source hardware address. */
 870	if (saddr)
 871		memcpy(&iph->saddr, saddr, 4);
 872	if (daddr)
 873		memcpy(&iph->daddr, daddr, 4);
 874	if (iph->daddr)
 875		return t->hlen + sizeof(*iph);
 876
 877	return -(t->hlen + sizeof(*iph));
 878}
 879
 880static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 881{
 882	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 883	memcpy(haddr, &iph->saddr, 4);
 884	return 4;
 885}
 886
 887static const struct header_ops ipgre_header_ops = {
 888	.create	= ipgre_header,
 889	.parse	= ipgre_header_parse,
 890};
 891
 892#ifdef CONFIG_NET_IPGRE_BROADCAST
 893static int ipgre_open(struct net_device *dev)
 894{
 895	struct ip_tunnel *t = netdev_priv(dev);
 896
 897	if (ipv4_is_multicast(t->parms.iph.daddr)) {
 898		struct flowi4 fl4;
 899		struct rtable *rt;
 900
 901		rt = ip_route_output_gre(t->net, &fl4,
 902					 t->parms.iph.daddr,
 903					 t->parms.iph.saddr,
 904					 t->parms.o_key,
 905					 RT_TOS(t->parms.iph.tos),
 906					 t->parms.link);
 907		if (IS_ERR(rt))
 908			return -EADDRNOTAVAIL;
 909		dev = rt->dst.dev;
 910		ip_rt_put(rt);
 911		if (!__in_dev_get_rtnl(dev))
 912			return -EADDRNOTAVAIL;
 913		t->mlink = dev->ifindex;
 914		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 915	}
 916	return 0;
 917}
 918
 919static int ipgre_close(struct net_device *dev)
 920{
 921	struct ip_tunnel *t = netdev_priv(dev);
 922
 923	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 924		struct in_device *in_dev;
 925		in_dev = inetdev_by_index(t->net, t->mlink);
 926		if (in_dev)
 927			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 928	}
 929	return 0;
 930}
 931#endif
 932
 933static const struct net_device_ops ipgre_netdev_ops = {
 934	.ndo_init		= ipgre_tunnel_init,
 935	.ndo_uninit		= ip_tunnel_uninit,
 936#ifdef CONFIG_NET_IPGRE_BROADCAST
 937	.ndo_open		= ipgre_open,
 938	.ndo_stop		= ipgre_close,
 939#endif
 940	.ndo_start_xmit		= ipgre_xmit,
 941	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
 942	.ndo_change_mtu		= ip_tunnel_change_mtu,
 943	.ndo_get_stats64	= dev_get_tstats64,
 944	.ndo_get_iflink		= ip_tunnel_get_iflink,
 945	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
 946};
 947
 948#define GRE_FEATURES (NETIF_F_SG |		\
 949		      NETIF_F_FRAGLIST |	\
 950		      NETIF_F_HIGHDMA |		\
 951		      NETIF_F_HW_CSUM)
 952
 953static void ipgre_tunnel_setup(struct net_device *dev)
 954{
 955	dev->netdev_ops		= &ipgre_netdev_ops;
 956	dev->type		= ARPHRD_IPGRE;
 957	ip_tunnel_setup(dev, ipgre_net_id);
 958}
 959
 960static void __gre_tunnel_init(struct net_device *dev)
 961{
 962	struct ip_tunnel *tunnel;
 963	__be16 flags;
 964
 965	tunnel = netdev_priv(dev);
 966	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 967	tunnel->parms.iph.protocol = IPPROTO_GRE;
 968
 969	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 970	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
 971
 972	dev->features		|= GRE_FEATURES | NETIF_F_LLTX;
 973	dev->hw_features	|= GRE_FEATURES;
 974
 975	flags = tunnel->parms.o_flags;
 
 
 
 
 
 
 
 
 
 976
 977	/* TCP offload with GRE SEQ is not supported, nor can we support 2
 978	 * levels of outer headers requiring an update.
 979	 */
 980	if (flags & TUNNEL_SEQ)
 981		return;
 982	if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
 983		return;
 984
 985	dev->features |= NETIF_F_GSO_SOFTWARE;
 986	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 987}
 988
 989static int ipgre_tunnel_init(struct net_device *dev)
 990{
 991	struct ip_tunnel *tunnel = netdev_priv(dev);
 992	struct iphdr *iph = &tunnel->parms.iph;
 993
 994	__gre_tunnel_init(dev);
 995
 996	__dev_addr_set(dev, &iph->saddr, 4);
 997	memcpy(dev->broadcast, &iph->daddr, 4);
 998
 999	dev->flags		= IFF_NOARP;
1000	netif_keep_dst(dev);
1001	dev->addr_len		= 4;
1002
1003	if (iph->daddr && !tunnel->collect_md) {
1004#ifdef CONFIG_NET_IPGRE_BROADCAST
1005		if (ipv4_is_multicast(iph->daddr)) {
1006			if (!iph->saddr)
1007				return -EINVAL;
1008			dev->flags = IFF_BROADCAST;
1009			dev->header_ops = &ipgre_header_ops;
1010			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1011			dev->needed_headroom = 0;
1012		}
1013#endif
1014	} else if (!tunnel->collect_md) {
1015		dev->header_ops = &ipgre_header_ops;
1016		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1017		dev->needed_headroom = 0;
1018	}
1019
1020	return ip_tunnel_init(dev);
1021}
1022
1023static const struct gre_protocol ipgre_protocol = {
1024	.handler     = gre_rcv,
1025	.err_handler = gre_err,
1026};
1027
1028static int __net_init ipgre_init_net(struct net *net)
1029{
1030	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1031}
1032
1033static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
1034					     struct list_head *dev_to_kill)
1035{
1036	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
1037			      dev_to_kill);
1038}
1039
1040static struct pernet_operations ipgre_net_ops = {
1041	.init = ipgre_init_net,
1042	.exit_batch_rtnl = ipgre_exit_batch_rtnl,
1043	.id   = &ipgre_net_id,
1044	.size = sizeof(struct ip_tunnel_net),
1045};
1046
1047static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1048				 struct netlink_ext_ack *extack)
1049{
1050	__be16 flags;
1051
1052	if (!data)
1053		return 0;
1054
1055	flags = 0;
1056	if (data[IFLA_GRE_IFLAGS])
1057		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1058	if (data[IFLA_GRE_OFLAGS])
1059		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1060	if (flags & (GRE_VERSION|GRE_ROUTING))
1061		return -EINVAL;
1062
1063	if (data[IFLA_GRE_COLLECT_METADATA] &&
1064	    data[IFLA_GRE_ENCAP_TYPE] &&
1065	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1066		return -EINVAL;
1067
1068	return 0;
1069}
1070
1071static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1072			      struct netlink_ext_ack *extack)
1073{
1074	__be32 daddr;
1075
1076	if (tb[IFLA_ADDRESS]) {
1077		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1078			return -EINVAL;
1079		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1080			return -EADDRNOTAVAIL;
1081	}
1082
1083	if (!data)
1084		goto out;
1085
1086	if (data[IFLA_GRE_REMOTE]) {
1087		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1088		if (!daddr)
1089			return -EINVAL;
1090	}
1091
1092out:
1093	return ipgre_tunnel_validate(tb, data, extack);
1094}
1095
1096static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1097			   struct netlink_ext_ack *extack)
1098{
1099	__be16 flags = 0;
1100	int ret;
1101
1102	if (!data)
1103		return 0;
1104
1105	ret = ipgre_tap_validate(tb, data, extack);
1106	if (ret)
1107		return ret;
1108
1109	if (data[IFLA_GRE_ERSPAN_VER] &&
1110	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1111		return 0;
1112
1113	/* ERSPAN type II/III should only have GRE sequence and key flag */
1114	if (data[IFLA_GRE_OFLAGS])
1115		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1116	if (data[IFLA_GRE_IFLAGS])
1117		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1118	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1119	    flags != (GRE_SEQ | GRE_KEY))
1120		return -EINVAL;
1121
1122	/* ERSPAN Session ID only has 10-bit. Since we reuse
1123	 * 32-bit key field as ID, check it's range.
1124	 */
1125	if (data[IFLA_GRE_IKEY] &&
1126	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1127		return -EINVAL;
1128
1129	if (data[IFLA_GRE_OKEY] &&
1130	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1131		return -EINVAL;
1132
1133	return 0;
1134}
1135
1136static int ipgre_netlink_parms(struct net_device *dev,
1137				struct nlattr *data[],
1138				struct nlattr *tb[],
1139				struct ip_tunnel_parm *parms,
1140				__u32 *fwmark)
1141{
1142	struct ip_tunnel *t = netdev_priv(dev);
1143
1144	memset(parms, 0, sizeof(*parms));
1145
1146	parms->iph.protocol = IPPROTO_GRE;
1147
1148	if (!data)
1149		return 0;
1150
1151	if (data[IFLA_GRE_LINK])
1152		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1153
1154	if (data[IFLA_GRE_IFLAGS])
1155		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1156
1157	if (data[IFLA_GRE_OFLAGS])
1158		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1159
1160	if (data[IFLA_GRE_IKEY])
1161		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1162
1163	if (data[IFLA_GRE_OKEY])
1164		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1165
1166	if (data[IFLA_GRE_LOCAL])
1167		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1168
1169	if (data[IFLA_GRE_REMOTE])
1170		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1171
1172	if (data[IFLA_GRE_TTL])
1173		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1174
1175	if (data[IFLA_GRE_TOS])
1176		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1177
1178	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1179		if (t->ignore_df)
1180			return -EINVAL;
1181		parms->iph.frag_off = htons(IP_DF);
1182	}
1183
1184	if (data[IFLA_GRE_COLLECT_METADATA]) {
1185		t->collect_md = true;
1186		if (dev->type == ARPHRD_IPGRE)
1187			dev->type = ARPHRD_NONE;
1188	}
1189
1190	if (data[IFLA_GRE_IGNORE_DF]) {
1191		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1192		  && (parms->iph.frag_off & htons(IP_DF)))
1193			return -EINVAL;
1194		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1195	}
1196
1197	if (data[IFLA_GRE_FWMARK])
1198		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1199
1200	return 0;
1201}
1202
1203static int erspan_netlink_parms(struct net_device *dev,
1204				struct nlattr *data[],
1205				struct nlattr *tb[],
1206				struct ip_tunnel_parm *parms,
1207				__u32 *fwmark)
1208{
1209	struct ip_tunnel *t = netdev_priv(dev);
1210	int err;
1211
1212	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1213	if (err)
1214		return err;
1215	if (!data)
1216		return 0;
1217
1218	if (data[IFLA_GRE_ERSPAN_VER]) {
1219		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1220
1221		if (t->erspan_ver > 2)
1222			return -EINVAL;
1223	}
1224
1225	if (t->erspan_ver == 1) {
1226		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1227			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1228			if (t->index & ~INDEX_MASK)
1229				return -EINVAL;
1230		}
1231	} else if (t->erspan_ver == 2) {
1232		if (data[IFLA_GRE_ERSPAN_DIR]) {
1233			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1234			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1235				return -EINVAL;
1236		}
1237		if (data[IFLA_GRE_ERSPAN_HWID]) {
1238			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1239			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1240				return -EINVAL;
1241		}
1242	}
1243
1244	return 0;
1245}
1246
1247/* This function returns true when ENCAP attributes are present in the nl msg */
1248static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1249				      struct ip_tunnel_encap *ipencap)
1250{
1251	bool ret = false;
1252
1253	memset(ipencap, 0, sizeof(*ipencap));
1254
1255	if (!data)
1256		return ret;
1257
1258	if (data[IFLA_GRE_ENCAP_TYPE]) {
1259		ret = true;
1260		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1261	}
1262
1263	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1264		ret = true;
1265		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1266	}
1267
1268	if (data[IFLA_GRE_ENCAP_SPORT]) {
1269		ret = true;
1270		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1271	}
1272
1273	if (data[IFLA_GRE_ENCAP_DPORT]) {
1274		ret = true;
1275		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1276	}
1277
1278	return ret;
1279}
1280
1281static int gre_tap_init(struct net_device *dev)
1282{
1283	__gre_tunnel_init(dev);
1284	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1285	netif_keep_dst(dev);
1286
1287	return ip_tunnel_init(dev);
1288}
1289
1290static const struct net_device_ops gre_tap_netdev_ops = {
1291	.ndo_init		= gre_tap_init,
1292	.ndo_uninit		= ip_tunnel_uninit,
1293	.ndo_start_xmit		= gre_tap_xmit,
1294	.ndo_set_mac_address 	= eth_mac_addr,
1295	.ndo_validate_addr	= eth_validate_addr,
1296	.ndo_change_mtu		= ip_tunnel_change_mtu,
1297	.ndo_get_stats64	= dev_get_tstats64,
1298	.ndo_get_iflink		= ip_tunnel_get_iflink,
1299	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1300};
1301
1302static int erspan_tunnel_init(struct net_device *dev)
1303{
1304	struct ip_tunnel *tunnel = netdev_priv(dev);
1305
1306	if (tunnel->erspan_ver == 0)
1307		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1308	else
1309		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1310
1311	tunnel->parms.iph.protocol = IPPROTO_GRE;
1312	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1313		       erspan_hdr_len(tunnel->erspan_ver);
1314
1315	dev->features		|= GRE_FEATURES;
1316	dev->hw_features	|= GRE_FEATURES;
1317	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1318	netif_keep_dst(dev);
1319
1320	return ip_tunnel_init(dev);
1321}
1322
1323static const struct net_device_ops erspan_netdev_ops = {
1324	.ndo_init		= erspan_tunnel_init,
1325	.ndo_uninit		= ip_tunnel_uninit,
1326	.ndo_start_xmit		= erspan_xmit,
1327	.ndo_set_mac_address	= eth_mac_addr,
1328	.ndo_validate_addr	= eth_validate_addr,
1329	.ndo_change_mtu		= ip_tunnel_change_mtu,
1330	.ndo_get_stats64	= dev_get_tstats64,
1331	.ndo_get_iflink		= ip_tunnel_get_iflink,
1332	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1333};
1334
1335static void ipgre_tap_setup(struct net_device *dev)
1336{
1337	ether_setup(dev);
1338	dev->max_mtu = 0;
1339	dev->netdev_ops	= &gre_tap_netdev_ops;
1340	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1341	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1342	ip_tunnel_setup(dev, gre_tap_net_id);
1343}
1344
1345static int
1346ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
 
1347{
 
1348	struct ip_tunnel_encap ipencap;
 
 
1349
1350	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1351		struct ip_tunnel *t = netdev_priv(dev);
1352		int err = ip_tunnel_encap_setup(t, &ipencap);
1353
1354		if (err < 0)
1355			return err;
1356	}
1357
1358	return 0;
1359}
1360
1361static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1362			 struct nlattr *tb[], struct nlattr *data[],
1363			 struct netlink_ext_ack *extack)
1364{
1365	struct ip_tunnel_parm p;
1366	__u32 fwmark = 0;
1367	int err;
1368
1369	err = ipgre_newlink_encap_setup(dev, data);
1370	if (err)
1371		return err;
1372
1373	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1374	if (err < 0)
1375		return err;
1376	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1377}
1378
1379static int erspan_newlink(struct net *src_net, struct net_device *dev,
1380			  struct nlattr *tb[], struct nlattr *data[],
1381			  struct netlink_ext_ack *extack)
1382{
1383	struct ip_tunnel_parm p;
1384	__u32 fwmark = 0;
1385	int err;
1386
1387	err = ipgre_newlink_encap_setup(dev, data);
1388	if (err)
1389		return err;
1390
1391	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1392	if (err)
1393		return err;
1394	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1395}
1396
1397static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1398			    struct nlattr *data[],
1399			    struct netlink_ext_ack *extack)
1400{
1401	struct ip_tunnel *t = netdev_priv(dev);
 
1402	__u32 fwmark = t->fwmark;
1403	struct ip_tunnel_parm p;
1404	int err;
1405
1406	err = ipgre_newlink_encap_setup(dev, data);
1407	if (err)
1408		return err;
 
 
 
1409
1410	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1411	if (err < 0)
1412		return err;
1413
1414	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1415	if (err < 0)
1416		return err;
1417
1418	t->parms.i_flags = p.i_flags;
1419	t->parms.o_flags = p.o_flags;
1420
1421	ipgre_link_update(dev, !tb[IFLA_MTU]);
1422
1423	return 0;
1424}
1425
1426static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1427			     struct nlattr *data[],
1428			     struct netlink_ext_ack *extack)
1429{
1430	struct ip_tunnel *t = netdev_priv(dev);
1431	__u32 fwmark = t->fwmark;
1432	struct ip_tunnel_parm p;
1433	int err;
1434
1435	err = ipgre_newlink_encap_setup(dev, data);
1436	if (err)
1437		return err;
1438
1439	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1440	if (err < 0)
1441		return err;
1442
1443	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1444	if (err < 0)
1445		return err;
1446
1447	t->parms.i_flags = p.i_flags;
1448	t->parms.o_flags = p.o_flags;
1449
1450	return 0;
1451}
1452
1453static size_t ipgre_get_size(const struct net_device *dev)
1454{
1455	return
1456		/* IFLA_GRE_LINK */
1457		nla_total_size(4) +
1458		/* IFLA_GRE_IFLAGS */
1459		nla_total_size(2) +
1460		/* IFLA_GRE_OFLAGS */
1461		nla_total_size(2) +
1462		/* IFLA_GRE_IKEY */
1463		nla_total_size(4) +
1464		/* IFLA_GRE_OKEY */
1465		nla_total_size(4) +
1466		/* IFLA_GRE_LOCAL */
1467		nla_total_size(4) +
1468		/* IFLA_GRE_REMOTE */
1469		nla_total_size(4) +
1470		/* IFLA_GRE_TTL */
1471		nla_total_size(1) +
1472		/* IFLA_GRE_TOS */
1473		nla_total_size(1) +
1474		/* IFLA_GRE_PMTUDISC */
1475		nla_total_size(1) +
1476		/* IFLA_GRE_ENCAP_TYPE */
1477		nla_total_size(2) +
1478		/* IFLA_GRE_ENCAP_FLAGS */
1479		nla_total_size(2) +
1480		/* IFLA_GRE_ENCAP_SPORT */
1481		nla_total_size(2) +
1482		/* IFLA_GRE_ENCAP_DPORT */
1483		nla_total_size(2) +
1484		/* IFLA_GRE_COLLECT_METADATA */
1485		nla_total_size(0) +
1486		/* IFLA_GRE_IGNORE_DF */
1487		nla_total_size(1) +
1488		/* IFLA_GRE_FWMARK */
1489		nla_total_size(4) +
1490		/* IFLA_GRE_ERSPAN_INDEX */
1491		nla_total_size(4) +
1492		/* IFLA_GRE_ERSPAN_VER */
1493		nla_total_size(1) +
1494		/* IFLA_GRE_ERSPAN_DIR */
1495		nla_total_size(1) +
1496		/* IFLA_GRE_ERSPAN_HWID */
1497		nla_total_size(2) +
1498		0;
1499}
1500
1501static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1502{
1503	struct ip_tunnel *t = netdev_priv(dev);
1504	struct ip_tunnel_parm *p = &t->parms;
1505	__be16 o_flags = p->o_flags;
1506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1507	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1508	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1509			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1510	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1511			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1512	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1513	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1514	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1515	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1516	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1517	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1518	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1519		       !!(p->iph.frag_off & htons(IP_DF))) ||
1520	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1521		goto nla_put_failure;
1522
1523	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1524			t->encap.type) ||
1525	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1526			 t->encap.sport) ||
1527	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1528			 t->encap.dport) ||
1529	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1530			t->encap.flags))
1531		goto nla_put_failure;
1532
1533	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1534		goto nla_put_failure;
1535
1536	if (t->collect_md) {
1537		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1538			goto nla_put_failure;
1539	}
1540
1541	return 0;
1542
1543nla_put_failure:
1544	return -EMSGSIZE;
1545}
1546
1547static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1548{
1549	struct ip_tunnel *t = netdev_priv(dev);
1550
1551	if (t->erspan_ver <= 2) {
1552		if (t->erspan_ver != 0 && !t->collect_md)
1553			t->parms.o_flags |= TUNNEL_KEY;
1554
1555		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1556			goto nla_put_failure;
1557
1558		if (t->erspan_ver == 1) {
1559			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1560				goto nla_put_failure;
1561		} else if (t->erspan_ver == 2) {
1562			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1563				goto nla_put_failure;
1564			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1565				goto nla_put_failure;
1566		}
1567	}
1568
1569	return ipgre_fill_info(skb, dev);
1570
1571nla_put_failure:
1572	return -EMSGSIZE;
1573}
1574
1575static void erspan_setup(struct net_device *dev)
1576{
1577	struct ip_tunnel *t = netdev_priv(dev);
1578
1579	ether_setup(dev);
1580	dev->max_mtu = 0;
1581	dev->netdev_ops = &erspan_netdev_ops;
1582	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1583	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1584	ip_tunnel_setup(dev, erspan_net_id);
1585	t->erspan_ver = 1;
1586}
1587
1588static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1589	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1590	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1591	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1592	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1593	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1594	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1595	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1596	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1597	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1598	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1599	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1600	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1601	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1602	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1603	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1604	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1605	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1606	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1607	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1608	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1609	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1610};
1611
1612static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1613	.kind		= "gre",
1614	.maxtype	= IFLA_GRE_MAX,
1615	.policy		= ipgre_policy,
1616	.priv_size	= sizeof(struct ip_tunnel),
1617	.setup		= ipgre_tunnel_setup,
1618	.validate	= ipgre_tunnel_validate,
1619	.newlink	= ipgre_newlink,
1620	.changelink	= ipgre_changelink,
1621	.dellink	= ip_tunnel_dellink,
1622	.get_size	= ipgre_get_size,
1623	.fill_info	= ipgre_fill_info,
1624	.get_link_net	= ip_tunnel_get_link_net,
1625};
1626
1627static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1628	.kind		= "gretap",
1629	.maxtype	= IFLA_GRE_MAX,
1630	.policy		= ipgre_policy,
1631	.priv_size	= sizeof(struct ip_tunnel),
1632	.setup		= ipgre_tap_setup,
1633	.validate	= ipgre_tap_validate,
1634	.newlink	= ipgre_newlink,
1635	.changelink	= ipgre_changelink,
1636	.dellink	= ip_tunnel_dellink,
1637	.get_size	= ipgre_get_size,
1638	.fill_info	= ipgre_fill_info,
1639	.get_link_net	= ip_tunnel_get_link_net,
1640};
1641
1642static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1643	.kind		= "erspan",
1644	.maxtype	= IFLA_GRE_MAX,
1645	.policy		= ipgre_policy,
1646	.priv_size	= sizeof(struct ip_tunnel),
1647	.setup		= erspan_setup,
1648	.validate	= erspan_validate,
1649	.newlink	= erspan_newlink,
1650	.changelink	= erspan_changelink,
1651	.dellink	= ip_tunnel_dellink,
1652	.get_size	= ipgre_get_size,
1653	.fill_info	= erspan_fill_info,
1654	.get_link_net	= ip_tunnel_get_link_net,
1655};
1656
1657struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1658					u8 name_assign_type)
1659{
1660	struct nlattr *tb[IFLA_MAX + 1];
1661	struct net_device *dev;
1662	LIST_HEAD(list_kill);
1663	struct ip_tunnel *t;
1664	int err;
1665
1666	memset(&tb, 0, sizeof(tb));
1667
1668	dev = rtnl_create_link(net, name, name_assign_type,
1669			       &ipgre_tap_ops, tb, NULL);
1670	if (IS_ERR(dev))
1671		return dev;
1672
1673	/* Configure flow based GRE device. */
1674	t = netdev_priv(dev);
1675	t->collect_md = true;
1676
1677	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1678	if (err < 0) {
1679		free_netdev(dev);
1680		return ERR_PTR(err);
1681	}
1682
1683	/* openvswitch users expect packet sizes to be unrestricted,
1684	 * so set the largest MTU we can.
1685	 */
1686	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1687	if (err)
1688		goto out;
1689
1690	err = rtnl_configure_link(dev, NULL, 0, NULL);
1691	if (err < 0)
1692		goto out;
1693
1694	return dev;
1695out:
1696	ip_tunnel_dellink(dev, &list_kill);
1697	unregister_netdevice_many(&list_kill);
1698	return ERR_PTR(err);
1699}
1700EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1701
1702static int __net_init ipgre_tap_init_net(struct net *net)
1703{
1704	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1705}
1706
1707static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
1708						 struct list_head *dev_to_kill)
1709{
1710	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
1711			      dev_to_kill);
1712}
1713
1714static struct pernet_operations ipgre_tap_net_ops = {
1715	.init = ipgre_tap_init_net,
1716	.exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
1717	.id   = &gre_tap_net_id,
1718	.size = sizeof(struct ip_tunnel_net),
1719};
1720
1721static int __net_init erspan_init_net(struct net *net)
1722{
1723	return ip_tunnel_init_net(net, erspan_net_id,
1724				  &erspan_link_ops, "erspan0");
1725}
1726
1727static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
1728					      struct list_head *dev_to_kill)
1729{
1730	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
1731			      dev_to_kill);
1732}
1733
1734static struct pernet_operations erspan_net_ops = {
1735	.init = erspan_init_net,
1736	.exit_batch_rtnl = erspan_exit_batch_rtnl,
1737	.id   = &erspan_net_id,
1738	.size = sizeof(struct ip_tunnel_net),
1739};
1740
1741static int __init ipgre_init(void)
1742{
1743	int err;
1744
1745	pr_info("GRE over IPv4 tunneling driver\n");
1746
1747	err = register_pernet_device(&ipgre_net_ops);
1748	if (err < 0)
1749		return err;
1750
1751	err = register_pernet_device(&ipgre_tap_net_ops);
1752	if (err < 0)
1753		goto pnet_tap_failed;
1754
1755	err = register_pernet_device(&erspan_net_ops);
1756	if (err < 0)
1757		goto pnet_erspan_failed;
1758
1759	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1760	if (err < 0) {
1761		pr_info("%s: can't add protocol\n", __func__);
1762		goto add_proto_failed;
1763	}
1764
1765	err = rtnl_link_register(&ipgre_link_ops);
1766	if (err < 0)
1767		goto rtnl_link_failed;
1768
1769	err = rtnl_link_register(&ipgre_tap_ops);
1770	if (err < 0)
1771		goto tap_ops_failed;
1772
1773	err = rtnl_link_register(&erspan_link_ops);
1774	if (err < 0)
1775		goto erspan_link_failed;
1776
1777	return 0;
1778
1779erspan_link_failed:
1780	rtnl_link_unregister(&ipgre_tap_ops);
1781tap_ops_failed:
1782	rtnl_link_unregister(&ipgre_link_ops);
1783rtnl_link_failed:
1784	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1785add_proto_failed:
1786	unregister_pernet_device(&erspan_net_ops);
1787pnet_erspan_failed:
1788	unregister_pernet_device(&ipgre_tap_net_ops);
1789pnet_tap_failed:
1790	unregister_pernet_device(&ipgre_net_ops);
1791	return err;
1792}
1793
1794static void __exit ipgre_fini(void)
1795{
1796	rtnl_link_unregister(&ipgre_tap_ops);
1797	rtnl_link_unregister(&ipgre_link_ops);
1798	rtnl_link_unregister(&erspan_link_ops);
1799	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1800	unregister_pernet_device(&ipgre_tap_net_ops);
1801	unregister_pernet_device(&ipgre_net_ops);
1802	unregister_pernet_device(&erspan_net_ops);
1803}
1804
1805module_init(ipgre_init);
1806module_exit(ipgre_fini);
1807MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1808MODULE_LICENSE("GPL");
1809MODULE_ALIAS_RTNL_LINK("gre");
1810MODULE_ALIAS_RTNL_LINK("gretap");
1811MODULE_ALIAS_RTNL_LINK("erspan");
1812MODULE_ALIAS_NETDEV("gre0");
1813MODULE_ALIAS_NETDEV("gretap0");
1814MODULE_ALIAS_NETDEV("erspan0");