Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	GRE over IPv6 protocol decoder.
   4 *
   5 *	Authors: Dmitry Kozlov (xeb@mail.ru)
   6 */
   7
   8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   9
  10#include <linux/capability.h>
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/uaccess.h>
  16#include <linux/skbuff.h>
  17#include <linux/netdevice.h>
  18#include <linux/in.h>
  19#include <linux/tcp.h>
  20#include <linux/udp.h>
  21#include <linux/if_arp.h>
  22#include <linux/init.h>
  23#include <linux/in6.h>
  24#include <linux/inetdevice.h>
  25#include <linux/igmp.h>
  26#include <linux/netfilter_ipv4.h>
  27#include <linux/etherdevice.h>
  28#include <linux/if_ether.h>
  29#include <linux/hash.h>
  30#include <linux/if_tunnel.h>
  31#include <linux/ip6_tunnel.h>
  32
  33#include <net/sock.h>
  34#include <net/ip.h>
  35#include <net/ip_tunnels.h>
  36#include <net/icmp.h>
  37#include <net/protocol.h>
  38#include <net/addrconf.h>
  39#include <net/arp.h>
  40#include <net/checksum.h>
  41#include <net/dsfield.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46#include <net/rtnetlink.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ip6_fib.h>
  50#include <net/ip6_route.h>
  51#include <net/ip6_tunnel.h>
  52#include <net/gre.h>
  53#include <net/erspan.h>
  54#include <net/dst_metadata.h>
  55
  56
  57static bool log_ecn_error = true;
  58module_param(log_ecn_error, bool, 0644);
  59MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  60
  61#define IP6_GRE_HASH_SIZE_SHIFT  5
  62#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
  63
  64static unsigned int ip6gre_net_id __read_mostly;
  65struct ip6gre_net {
  66	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
  67
  68	struct ip6_tnl __rcu *collect_md_tun;
  69	struct ip6_tnl __rcu *collect_md_tun_erspan;
  70	struct net_device *fb_tunnel_dev;
  71};
  72
  73static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
  74static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
  75static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
  76static int ip6gre_tunnel_init(struct net_device *dev);
  77static void ip6gre_tunnel_setup(struct net_device *dev);
  78static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
  79static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
  80static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu);
  81
  82/* Tunnel hash table */
  83
  84/*
  85   4 hash tables:
  86
  87   3: (remote,local)
  88   2: (remote,*)
  89   1: (*,local)
  90   0: (*,*)
  91
  92   We require exact key match i.e. if a key is present in packet
  93   it will match only tunnel with the same key; if it is not present,
  94   it will match only keyless tunnel.
  95
  96   All keysless packets, if not matched configured keyless tunnels
  97   will match fallback tunnel.
  98 */
  99
 100#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
 101static u32 HASH_ADDR(const struct in6_addr *addr)
 102{
 103	u32 hash = ipv6_addr_hash(addr);
 104
 105	return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
 106}
 107
 108#define tunnels_r_l	tunnels[3]
 109#define tunnels_r	tunnels[2]
 110#define tunnels_l	tunnels[1]
 111#define tunnels_wc	tunnels[0]
 112
 113/* Given src, dst and key, find appropriate for input tunnel. */
 114
 115static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 116		const struct in6_addr *remote, const struct in6_addr *local,
 117		__be32 key, __be16 gre_proto)
 118{
 119	struct net *net = dev_net(dev);
 120	int link = dev->ifindex;
 121	unsigned int h0 = HASH_ADDR(remote);
 122	unsigned int h1 = HASH_KEY(key);
 123	struct ip6_tnl *t, *cand = NULL;
 124	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 125	int dev_type = (gre_proto == htons(ETH_P_TEB) ||
 126			gre_proto == htons(ETH_P_ERSPAN) ||
 127			gre_proto == htons(ETH_P_ERSPAN2)) ?
 128		       ARPHRD_ETHER : ARPHRD_IP6GRE;
 129	int score, cand_score = 4;
 130	struct net_device *ndev;
 131
 132	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
 133		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
 134		    !ipv6_addr_equal(remote, &t->parms.raddr) ||
 135		    key != t->parms.i_key ||
 136		    !(t->dev->flags & IFF_UP))
 137			continue;
 138
 139		if (t->dev->type != ARPHRD_IP6GRE &&
 140		    t->dev->type != dev_type)
 141			continue;
 142
 143		score = 0;
 144		if (t->parms.link != link)
 145			score |= 1;
 146		if (t->dev->type != dev_type)
 147			score |= 2;
 148		if (score == 0)
 149			return t;
 150
 151		if (score < cand_score) {
 152			cand = t;
 153			cand_score = score;
 154		}
 155	}
 156
 157	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
 158		if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
 159		    key != t->parms.i_key ||
 160		    !(t->dev->flags & IFF_UP))
 161			continue;
 162
 163		if (t->dev->type != ARPHRD_IP6GRE &&
 164		    t->dev->type != dev_type)
 165			continue;
 166
 167		score = 0;
 168		if (t->parms.link != link)
 169			score |= 1;
 170		if (t->dev->type != dev_type)
 171			score |= 2;
 172		if (score == 0)
 173			return t;
 174
 175		if (score < cand_score) {
 176			cand = t;
 177			cand_score = score;
 178		}
 179	}
 180
 181	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
 182		if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
 183			  (!ipv6_addr_equal(local, &t->parms.raddr) ||
 184				 !ipv6_addr_is_multicast(local))) ||
 185		    key != t->parms.i_key ||
 186		    !(t->dev->flags & IFF_UP))
 187			continue;
 188
 189		if (t->dev->type != ARPHRD_IP6GRE &&
 190		    t->dev->type != dev_type)
 191			continue;
 192
 193		score = 0;
 194		if (t->parms.link != link)
 195			score |= 1;
 196		if (t->dev->type != dev_type)
 197			score |= 2;
 198		if (score == 0)
 199			return t;
 200
 201		if (score < cand_score) {
 202			cand = t;
 203			cand_score = score;
 204		}
 205	}
 206
 207	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
 208		if (t->parms.i_key != key ||
 209		    !(t->dev->flags & IFF_UP))
 210			continue;
 211
 212		if (t->dev->type != ARPHRD_IP6GRE &&
 213		    t->dev->type != dev_type)
 214			continue;
 215
 216		score = 0;
 217		if (t->parms.link != link)
 218			score |= 1;
 219		if (t->dev->type != dev_type)
 220			score |= 2;
 221		if (score == 0)
 222			return t;
 223
 224		if (score < cand_score) {
 225			cand = t;
 226			cand_score = score;
 227		}
 228	}
 229
 230	if (cand)
 231		return cand;
 232
 233	if (gre_proto == htons(ETH_P_ERSPAN) ||
 234	    gre_proto == htons(ETH_P_ERSPAN2))
 235		t = rcu_dereference(ign->collect_md_tun_erspan);
 236	else
 237		t = rcu_dereference(ign->collect_md_tun);
 238
 239	if (t && t->dev->flags & IFF_UP)
 240		return t;
 241
 242	ndev = READ_ONCE(ign->fb_tunnel_dev);
 243	if (ndev && ndev->flags & IFF_UP)
 244		return netdev_priv(ndev);
 245
 246	return NULL;
 247}
 248
 249static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
 250		const struct __ip6_tnl_parm *p)
 251{
 252	const struct in6_addr *remote = &p->raddr;
 253	const struct in6_addr *local = &p->laddr;
 254	unsigned int h = HASH_KEY(p->i_key);
 255	int prio = 0;
 256
 257	if (!ipv6_addr_any(local))
 258		prio |= 1;
 259	if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
 260		prio |= 2;
 261		h ^= HASH_ADDR(remote);
 262	}
 263
 264	return &ign->tunnels[prio][h];
 265}
 266
 267static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
 268{
 269	if (t->parms.collect_md)
 270		rcu_assign_pointer(ign->collect_md_tun, t);
 271}
 272
 273static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
 274{
 275	if (t->parms.collect_md)
 276		rcu_assign_pointer(ign->collect_md_tun_erspan, t);
 277}
 278
 279static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
 280{
 281	if (t->parms.collect_md)
 282		rcu_assign_pointer(ign->collect_md_tun, NULL);
 283}
 284
 285static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
 286				       struct ip6_tnl *t)
 287{
 288	if (t->parms.collect_md)
 289		rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
 290}
 291
 292static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
 293		const struct ip6_tnl *t)
 294{
 295	return __ip6gre_bucket(ign, &t->parms);
 296}
 297
 298static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
 299{
 300	struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
 301
 302	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 303	rcu_assign_pointer(*tp, t);
 304}
 305
 306static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
 307{
 308	struct ip6_tnl __rcu **tp;
 309	struct ip6_tnl *iter;
 310
 311	for (tp = ip6gre_bucket(ign, t);
 312	     (iter = rtnl_dereference(*tp)) != NULL;
 313	     tp = &iter->next) {
 314		if (t == iter) {
 315			rcu_assign_pointer(*tp, t->next);
 316			break;
 317		}
 318	}
 319}
 320
 321static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
 322					   const struct __ip6_tnl_parm *parms,
 323					   int type)
 324{
 325	const struct in6_addr *remote = &parms->raddr;
 326	const struct in6_addr *local = &parms->laddr;
 327	__be32 key = parms->i_key;
 328	int link = parms->link;
 329	struct ip6_tnl *t;
 330	struct ip6_tnl __rcu **tp;
 331	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 332
 333	for (tp = __ip6gre_bucket(ign, parms);
 334	     (t = rtnl_dereference(*tp)) != NULL;
 335	     tp = &t->next)
 336		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 337		    ipv6_addr_equal(remote, &t->parms.raddr) &&
 338		    key == t->parms.i_key &&
 339		    link == t->parms.link &&
 340		    type == t->dev->type)
 341			break;
 342
 343	return t;
 344}
 345
 346static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 347		const struct __ip6_tnl_parm *parms, int create)
 348{
 349	struct ip6_tnl *t, *nt;
 350	struct net_device *dev;
 351	char name[IFNAMSIZ];
 352	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 353
 354	t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
 355	if (t && create)
 356		return NULL;
 357	if (t || !create)
 358		return t;
 359
 360	if (parms->name[0]) {
 361		if (!dev_valid_name(parms->name))
 362			return NULL;
 363		strlcpy(name, parms->name, IFNAMSIZ);
 364	} else {
 365		strcpy(name, "ip6gre%d");
 366	}
 367	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 368			   ip6gre_tunnel_setup);
 369	if (!dev)
 370		return NULL;
 371
 372	dev_net_set(dev, net);
 373
 374	nt = netdev_priv(dev);
 375	nt->parms = *parms;
 376	dev->rtnl_link_ops = &ip6gre_link_ops;
 377
 378	nt->dev = dev;
 379	nt->net = dev_net(dev);
 380
 381	if (register_netdevice(dev) < 0)
 382		goto failed_free;
 383
 384	ip6gre_tnl_link_config(nt, 1);
 385
 386	/* Can use a lockless transmit, unless we generate output sequences */
 387	if (!(nt->parms.o_flags & TUNNEL_SEQ))
 388		dev->features |= NETIF_F_LLTX;
 389
 390	dev_hold(dev);
 391	ip6gre_tunnel_link(ign, nt);
 392	return nt;
 393
 394failed_free:
 395	free_netdev(dev);
 396	return NULL;
 397}
 398
 399static void ip6erspan_tunnel_uninit(struct net_device *dev)
 400{
 401	struct ip6_tnl *t = netdev_priv(dev);
 402	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
 403
 404	ip6erspan_tunnel_unlink_md(ign, t);
 405	ip6gre_tunnel_unlink(ign, t);
 406	dst_cache_reset(&t->dst_cache);
 407	dev_put(dev);
 408}
 409
 410static void ip6gre_tunnel_uninit(struct net_device *dev)
 411{
 412	struct ip6_tnl *t = netdev_priv(dev);
 413	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
 414
 415	ip6gre_tunnel_unlink_md(ign, t);
 416	ip6gre_tunnel_unlink(ign, t);
 417	if (ign->fb_tunnel_dev == dev)
 418		WRITE_ONCE(ign->fb_tunnel_dev, NULL);
 419	dst_cache_reset(&t->dst_cache);
 420	dev_put(dev);
 421}
 422
 423
 424static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 425		       u8 type, u8 code, int offset, __be32 info)
 426{
 427	struct net *net = dev_net(skb->dev);
 428	const struct ipv6hdr *ipv6h;
 429	struct tnl_ptk_info tpi;
 430	struct ip6_tnl *t;
 431
 432	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
 433			     offset) < 0)
 434		return -EINVAL;
 435
 436	ipv6h = (const struct ipv6hdr *)skb->data;
 437	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
 438				 tpi.key, tpi.proto);
 439	if (!t)
 440		return -ENOENT;
 441
 442	switch (type) {
 443	case ICMPV6_DEST_UNREACH:
 444		net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 445				    t->parms.name);
 446		if (code != ICMPV6_PORT_UNREACH)
 447			break;
 448		return 0;
 449	case ICMPV6_TIME_EXCEED:
 450		if (code == ICMPV6_EXC_HOPLIMIT) {
 451			net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 452					    t->parms.name);
 453			break;
 454		}
 455		return 0;
 456	case ICMPV6_PARAMPROB: {
 457		struct ipv6_tlv_tnl_enc_lim *tel;
 458		__u32 teli;
 459
 460		teli = 0;
 461		if (code == ICMPV6_HDR_FIELD)
 462			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 463
 464		if (teli && teli == be32_to_cpu(info) - 2) {
 465			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 466			if (tel->encap_limit == 0) {
 467				net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
 468						    t->parms.name);
 469			}
 470		} else {
 471			net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 472					    t->parms.name);
 473		}
 474		return 0;
 475	}
 476	case ICMPV6_PKT_TOOBIG:
 477		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 478		return 0;
 479	case NDISC_REDIRECT:
 480		ip6_redirect(skb, net, skb->dev->ifindex, 0,
 481			     sock_net_uid(net, NULL));
 482		return 0;
 483	}
 484
 485	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
 486		t->err_count++;
 487	else
 488		t->err_count = 1;
 489	t->err_time = jiffies;
 490
 491	return 0;
 492}
 493
 494static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 495{
 496	const struct ipv6hdr *ipv6h;
 497	struct ip6_tnl *tunnel;
 498
 499	ipv6h = ipv6_hdr(skb);
 500	tunnel = ip6gre_tunnel_lookup(skb->dev,
 501				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
 502				      tpi->proto);
 503	if (tunnel) {
 504		if (tunnel->parms.collect_md) {
 505			struct metadata_dst *tun_dst;
 506			__be64 tun_id;
 507			__be16 flags;
 508
 509			flags = tpi->flags;
 510			tun_id = key32_to_tunnel_id(tpi->key);
 511
 512			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
 513			if (!tun_dst)
 514				return PACKET_REJECT;
 515
 516			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 517		} else {
 518			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 519		}
 520
 521		return PACKET_RCVD;
 522	}
 523
 524	return PACKET_REJECT;
 525}
 526
 527static int ip6erspan_rcv(struct sk_buff *skb,
 528			 struct tnl_ptk_info *tpi,
 529			 int gre_hdr_len)
 530{
 531	struct erspan_base_hdr *ershdr;
 532	const struct ipv6hdr *ipv6h;
 533	struct erspan_md2 *md2;
 534	struct ip6_tnl *tunnel;
 535	u8 ver;
 536
 537	ipv6h = ipv6_hdr(skb);
 538	ershdr = (struct erspan_base_hdr *)skb->data;
 539	ver = ershdr->ver;
 540
 541	tunnel = ip6gre_tunnel_lookup(skb->dev,
 542				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
 543				      tpi->proto);
 544	if (tunnel) {
 545		int len = erspan_hdr_len(ver);
 546
 547		if (unlikely(!pskb_may_pull(skb, len)))
 548			return PACKET_REJECT;
 549
 550		if (__iptunnel_pull_header(skb, len,
 551					   htons(ETH_P_TEB),
 552					   false, false) < 0)
 553			return PACKET_REJECT;
 554
 555		if (tunnel->parms.collect_md) {
 556			struct erspan_metadata *pkt_md, *md;
 557			struct metadata_dst *tun_dst;
 558			struct ip_tunnel_info *info;
 559			unsigned char *gh;
 560			__be64 tun_id;
 561			__be16 flags;
 562
 563			tpi->flags |= TUNNEL_KEY;
 564			flags = tpi->flags;
 565			tun_id = key32_to_tunnel_id(tpi->key);
 566
 567			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
 568						  sizeof(*md));
 569			if (!tun_dst)
 570				return PACKET_REJECT;
 571
 572			/* skb can be uncloned in __iptunnel_pull_header, so
 573			 * old pkt_md is no longer valid and we need to reset
 574			 * it
 575			 */
 576			gh = skb_network_header(skb) +
 577			     skb_network_header_len(skb);
 578			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
 579							    sizeof(*ershdr));
 580			info = &tun_dst->u.tun_info;
 581			md = ip_tunnel_info_opts(info);
 582			md->version = ver;
 583			md2 = &md->u.md2;
 584			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
 585						       ERSPAN_V2_MDSIZE);
 586			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
 587			info->options_len = sizeof(*md);
 588
 589			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 590
 591		} else {
 592			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 593		}
 594
 595		return PACKET_RCVD;
 596	}
 597
 598	return PACKET_REJECT;
 599}
 600
 601static int gre_rcv(struct sk_buff *skb)
 602{
 603	struct tnl_ptk_info tpi;
 604	bool csum_err = false;
 605	int hdr_len;
 606
 607	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
 608	if (hdr_len < 0)
 609		goto drop;
 610
 611	if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
 612		goto drop;
 613
 614	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 615		     tpi.proto == htons(ETH_P_ERSPAN2))) {
 616		if (ip6erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 617			return 0;
 618		goto out;
 619	}
 620
 621	if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
 622		return 0;
 623
 624out:
 625	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 626drop:
 627	kfree_skb(skb);
 628	return 0;
 629}
 630
 631static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 632{
 633	return iptunnel_handle_offloads(skb,
 634					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 635}
 636
 637static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
 638				     struct net_device *dev,
 639				     struct flowi6 *fl6, __u8 *dsfield,
 640				     int *encap_limit)
 641{
 642	const struct iphdr *iph = ip_hdr(skb);
 643	struct ip6_tnl *t = netdev_priv(dev);
 644
 645	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 646		*encap_limit = t->parms.encap_limit;
 647
 648	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
 649
 650	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
 651		*dsfield = ipv4_get_dsfield(iph);
 652	else
 653		*dsfield = ip6_tclass(t->parms.flowinfo);
 654
 655	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 656		fl6->flowi6_mark = skb->mark;
 657	else
 658		fl6->flowi6_mark = t->parms.fwmark;
 659
 660	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 661}
 662
 663static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
 664				    struct net_device *dev,
 665				    struct flowi6 *fl6, __u8 *dsfield,
 666				    int *encap_limit)
 667{
 668	struct ipv6hdr *ipv6h;
 669	struct ip6_tnl *t = netdev_priv(dev);
 670	__u16 offset;
 671
 672	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 673	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
 674	ipv6h = ipv6_hdr(skb);
 675
 676	if (offset > 0) {
 677		struct ipv6_tlv_tnl_enc_lim *tel;
 678
 679		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 680		if (tel->encap_limit == 0) {
 681			icmpv6_send(skb, ICMPV6_PARAMPROB,
 682				    ICMPV6_HDR_FIELD, offset + 2);
 683			return -1;
 684		}
 685		*encap_limit = tel->encap_limit - 1;
 686	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
 687		*encap_limit = t->parms.encap_limit;
 688	}
 689
 690	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
 691
 692	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
 693		*dsfield = ipv6_get_dsfield(ipv6h);
 694	else
 695		*dsfield = ip6_tclass(t->parms.flowinfo);
 696
 697	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
 698		fl6->flowlabel |= ip6_flowlabel(ipv6h);
 699
 700	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 701		fl6->flowi6_mark = skb->mark;
 702	else
 703		fl6->flowi6_mark = t->parms.fwmark;
 704
 705	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 706
 707	return 0;
 708}
 709
 710static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 711			       struct net_device *dev, __u8 dsfield,
 712			       struct flowi6 *fl6, int encap_limit,
 713			       __u32 *pmtu, __be16 proto)
 714{
 715	struct ip6_tnl *tunnel = netdev_priv(dev);
 716	__be16 protocol;
 717
 718	if (dev->type == ARPHRD_ETHER)
 719		IPCB(skb)->flags = 0;
 720
 721	if (dev->header_ops && dev->type == ARPHRD_IP6GRE)
 722		fl6->daddr = ((struct ipv6hdr *)skb->data)->daddr;
 723	else
 724		fl6->daddr = tunnel->parms.raddr;
 725
 726	if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
 727		return -ENOMEM;
 728
 729	/* Push GRE header. */
 730	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 731
 732	if (tunnel->parms.collect_md) {
 733		struct ip_tunnel_info *tun_info;
 734		const struct ip_tunnel_key *key;
 735		__be16 flags;
 736
 737		tun_info = skb_tunnel_info(skb);
 738		if (unlikely(!tun_info ||
 739			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 740			     ip_tunnel_info_af(tun_info) != AF_INET6))
 741			return -EINVAL;
 742
 743		key = &tun_info->key;
 744		memset(fl6, 0, sizeof(*fl6));
 745		fl6->flowi6_proto = IPPROTO_GRE;
 746		fl6->daddr = key->u.ipv6.dst;
 747		fl6->flowlabel = key->label;
 748		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 749
 750		dsfield = key->tos;
 751		flags = key->tun_flags &
 752			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 753		tunnel->tun_hlen = gre_calc_hlen(flags);
 754
 755		gre_build_header(skb, tunnel->tun_hlen,
 756				 flags, protocol,
 757				 tunnel_id_to_key32(tun_info->key.tun_id),
 758				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
 759						      : 0);
 760
 761	} else {
 762		if (tunnel->parms.o_flags & TUNNEL_SEQ)
 763			tunnel->o_seqno++;
 764
 765		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 766				 protocol, tunnel->parms.o_key,
 767				 htonl(tunnel->o_seqno));
 768	}
 769
 770	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
 771			    NEXTHDR_GRE);
 772}
 773
 774static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
 775{
 776	struct ip6_tnl *t = netdev_priv(dev);
 777	int encap_limit = -1;
 778	struct flowi6 fl6;
 779	__u8 dsfield = 0;
 780	__u32 mtu;
 781	int err;
 782
 783	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 784
 785	if (!t->parms.collect_md)
 786		prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
 787					 &dsfield, &encap_limit);
 788
 789	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 790	if (err)
 791		return -1;
 792
 793	err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
 794			  skb->protocol);
 795	if (err != 0) {
 796		/* XXX: send ICMP error even if DF is not set. */
 797		if (err == -EMSGSIZE)
 798			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 799				  htonl(mtu));
 800		return -1;
 801	}
 802
 803	return 0;
 804}
 805
 806static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
 807{
 808	struct ip6_tnl *t = netdev_priv(dev);
 809	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 810	int encap_limit = -1;
 811	struct flowi6 fl6;
 812	__u8 dsfield = 0;
 813	__u32 mtu;
 814	int err;
 815
 816	if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
 817		return -1;
 818
 819	if (!t->parms.collect_md &&
 820	    prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
 821		return -1;
 822
 823	if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
 824		return -1;
 825
 826	err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
 827			  &mtu, skb->protocol);
 828	if (err != 0) {
 829		if (err == -EMSGSIZE)
 830			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 831		return -1;
 832	}
 833
 834	return 0;
 835}
 836
 837/**
 838 * ip6gre_tnl_addr_conflict - compare packet addresses to tunnel's own
 839 *   @t: the outgoing tunnel device
 840 *   @hdr: IPv6 header from the incoming packet
 841 *
 842 * Description:
 843 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 844 *   doesn't match source of incoming packet.
 845 *
 846 * Return:
 847 *   1 if conflict,
 848 *   0 else
 849 **/
 850
 851static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
 852	const struct ipv6hdr *hdr)
 853{
 854	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 855}
 856
 857static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
 858{
 859	struct ip6_tnl *t = netdev_priv(dev);
 860	int encap_limit = -1;
 861	struct flowi6 fl6;
 862	__u32 mtu;
 863	int err;
 864
 865	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 866		encap_limit = t->parms.encap_limit;
 867
 868	if (!t->parms.collect_md)
 869		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
 870
 871	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 872	if (err)
 873		return err;
 874
 875	err = __gre6_xmit(skb, dev, 0, &fl6, encap_limit, &mtu, skb->protocol);
 876
 877	return err;
 878}
 879
 880static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
 881	struct net_device *dev)
 882{
 883	struct ip6_tnl *t = netdev_priv(dev);
 884	struct net_device_stats *stats = &t->dev->stats;
 885	int ret;
 886
 887	if (!pskb_inet_may_pull(skb))
 888		goto tx_err;
 889
 890	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
 891		goto tx_err;
 892
 893	switch (skb->protocol) {
 894	case htons(ETH_P_IP):
 895		ret = ip6gre_xmit_ipv4(skb, dev);
 896		break;
 897	case htons(ETH_P_IPV6):
 898		ret = ip6gre_xmit_ipv6(skb, dev);
 899		break;
 900	default:
 901		ret = ip6gre_xmit_other(skb, dev);
 902		break;
 903	}
 904
 905	if (ret < 0)
 906		goto tx_err;
 907
 908	return NETDEV_TX_OK;
 909
 910tx_err:
 911	stats->tx_errors++;
 912	stats->tx_dropped++;
 913	kfree_skb(skb);
 914	return NETDEV_TX_OK;
 915}
 916
 917static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 918					 struct net_device *dev)
 919{
 920	struct ip6_tnl *t = netdev_priv(dev);
 921	struct dst_entry *dst = skb_dst(skb);
 922	struct net_device_stats *stats;
 923	bool truncate = false;
 924	int encap_limit = -1;
 925	__u8 dsfield = false;
 926	struct flowi6 fl6;
 927	int err = -EINVAL;
 928	__be16 proto;
 929	__u32 mtu;
 930	int nhoff;
 931	int thoff;
 932
 933	if (!pskb_inet_may_pull(skb))
 934		goto tx_err;
 935
 936	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
 937		goto tx_err;
 938
 939	if (gre_handle_offloads(skb, false))
 940		goto tx_err;
 941
 942	if (skb->len > dev->mtu + dev->hard_header_len) {
 943		pskb_trim(skb, dev->mtu + dev->hard_header_len);
 944		truncate = true;
 945	}
 946
 947	nhoff = skb_network_header(skb) - skb_mac_header(skb);
 948	if (skb->protocol == htons(ETH_P_IP) &&
 949	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
 950		truncate = true;
 951
 952	thoff = skb_transport_header(skb) - skb_mac_header(skb);
 953	if (skb->protocol == htons(ETH_P_IPV6) &&
 954	    (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
 955		truncate = true;
 956
 957	if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
 958		goto tx_err;
 959
 960	t->parms.o_flags &= ~TUNNEL_KEY;
 961	IPCB(skb)->flags = 0;
 962
 963	/* For collect_md mode, derive fl6 from the tunnel key,
 964	 * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
 965	 */
 966	if (t->parms.collect_md) {
 967		struct ip_tunnel_info *tun_info;
 968		const struct ip_tunnel_key *key;
 969		struct erspan_metadata *md;
 970		__be32 tun_id;
 971
 972		tun_info = skb_tunnel_info(skb);
 973		if (unlikely(!tun_info ||
 974			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 975			     ip_tunnel_info_af(tun_info) != AF_INET6))
 976			goto tx_err;
 977
 978		key = &tun_info->key;
 979		memset(&fl6, 0, sizeof(fl6));
 980		fl6.flowi6_proto = IPPROTO_GRE;
 981		fl6.daddr = key->u.ipv6.dst;
 982		fl6.flowlabel = key->label;
 983		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 984
 985		dsfield = key->tos;
 986		if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
 987			goto tx_err;
 988		if (tun_info->options_len < sizeof(*md))
 989			goto tx_err;
 990		md = ip_tunnel_info_opts(tun_info);
 991
 992		tun_id = tunnel_id_to_key32(key->tun_id);
 993		if (md->version == 1) {
 994			erspan_build_header(skb,
 995					    ntohl(tun_id),
 996					    ntohl(md->u.index), truncate,
 997					    false);
 998		} else if (md->version == 2) {
 999			erspan_build_header_v2(skb,
1000					       ntohl(tun_id),
1001					       md->u.md2.dir,
1002					       get_hwid(&md->u.md2),
1003					       truncate, false);
1004		} else {
1005			goto tx_err;
1006		}
1007	} else {
1008		switch (skb->protocol) {
1009		case htons(ETH_P_IP):
1010			memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1011			prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
1012						 &dsfield, &encap_limit);
1013			break;
1014		case htons(ETH_P_IPV6):
1015			if (ipv6_addr_equal(&t->parms.raddr, &ipv6_hdr(skb)->saddr))
1016				goto tx_err;
1017			if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
1018						     &dsfield, &encap_limit))
1019				goto tx_err;
1020			break;
1021		default:
1022			memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1023			break;
1024		}
1025
1026		if (t->parms.erspan_ver == 1)
1027			erspan_build_header(skb, ntohl(t->parms.o_key),
1028					    t->parms.index,
1029					    truncate, false);
1030		else if (t->parms.erspan_ver == 2)
1031			erspan_build_header_v2(skb, ntohl(t->parms.o_key),
1032					       t->parms.dir,
1033					       t->parms.hwid,
1034					       truncate, false);
1035		else
1036			goto tx_err;
1037
1038		fl6.daddr = t->parms.raddr;
1039	}
1040
1041	/* Push GRE header. */
1042	proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
1043					   : htons(ETH_P_ERSPAN2);
1044	gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
1045
1046	/* TooBig packet may have updated dst->dev's mtu */
1047	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
1048		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
1049
1050	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1051			   NEXTHDR_GRE);
1052	if (err != 0) {
1053		/* XXX: send ICMP error even if DF is not set. */
1054		if (err == -EMSGSIZE) {
1055			if (skb->protocol == htons(ETH_P_IP))
1056				icmp_send(skb, ICMP_DEST_UNREACH,
1057					  ICMP_FRAG_NEEDED, htonl(mtu));
1058			else
1059				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1060		}
1061
1062		goto tx_err;
1063	}
1064	return NETDEV_TX_OK;
1065
1066tx_err:
1067	stats = &t->dev->stats;
1068	stats->tx_errors++;
1069	stats->tx_dropped++;
1070	kfree_skb(skb);
1071	return NETDEV_TX_OK;
1072}
1073
1074static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
1075{
1076	struct net_device *dev = t->dev;
1077	struct __ip6_tnl_parm *p = &t->parms;
1078	struct flowi6 *fl6 = &t->fl.u.ip6;
1079
1080	if (dev->type != ARPHRD_ETHER) {
1081		memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1082		memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1083	}
1084
1085	/* Set up flowi template */
1086	fl6->saddr = p->laddr;
1087	fl6->daddr = p->raddr;
1088	fl6->flowi6_oif = p->link;
1089	fl6->flowlabel = 0;
1090	fl6->flowi6_proto = IPPROTO_GRE;
1091
1092	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1093		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1094	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1095		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1096
1097	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1098	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1099
1100	if (p->flags&IP6_TNL_F_CAP_XMIT &&
1101			p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
1102		dev->flags |= IFF_POINTOPOINT;
1103	else
1104		dev->flags &= ~IFF_POINTOPOINT;
1105}
1106
1107static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
1108					 int t_hlen)
1109{
1110	const struct __ip6_tnl_parm *p = &t->parms;
1111	struct net_device *dev = t->dev;
1112
1113	if (p->flags & IP6_TNL_F_CAP_XMIT) {
1114		int strict = (ipv6_addr_type(&p->raddr) &
1115			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1116
1117		struct rt6_info *rt = rt6_lookup(t->net,
1118						 &p->raddr, &p->laddr,
1119						 p->link, NULL, strict);
1120
1121		if (!rt)
1122			return;
1123
1124		if (rt->dst.dev) {
1125			dev->needed_headroom = rt->dst.dev->hard_header_len +
1126					       t_hlen;
1127
1128			if (set_mtu) {
1129				dev->mtu = rt->dst.dev->mtu - t_hlen;
1130				if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1131					dev->mtu -= 8;
1132				if (dev->type == ARPHRD_ETHER)
1133					dev->mtu -= ETH_HLEN;
1134
1135				if (dev->mtu < IPV6_MIN_MTU)
1136					dev->mtu = IPV6_MIN_MTU;
1137			}
1138		}
1139		ip6_rt_put(rt);
1140	}
1141}
1142
1143static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
1144{
1145	int t_hlen;
1146
1147	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1148	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1149
1150	t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
1151	tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
1152	return t_hlen;
1153}
1154
1155static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
1156{
1157	ip6gre_tnl_link_config_common(t);
1158	ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t));
1159}
1160
1161static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
1162				     const struct __ip6_tnl_parm *p)
1163{
1164	t->parms.laddr = p->laddr;
1165	t->parms.raddr = p->raddr;
1166	t->parms.flags = p->flags;
1167	t->parms.hop_limit = p->hop_limit;
1168	t->parms.encap_limit = p->encap_limit;
1169	t->parms.flowinfo = p->flowinfo;
1170	t->parms.link = p->link;
1171	t->parms.proto = p->proto;
1172	t->parms.i_key = p->i_key;
1173	t->parms.o_key = p->o_key;
1174	t->parms.i_flags = p->i_flags;
1175	t->parms.o_flags = p->o_flags;
1176	t->parms.fwmark = p->fwmark;
1177	t->parms.erspan_ver = p->erspan_ver;
1178	t->parms.index = p->index;
1179	t->parms.dir = p->dir;
1180	t->parms.hwid = p->hwid;
1181	dst_cache_reset(&t->dst_cache);
1182}
1183
1184static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p,
1185			     int set_mtu)
1186{
1187	ip6gre_tnl_copy_tnl_parm(t, p);
1188	ip6gre_tnl_link_config(t, set_mtu);
1189	return 0;
1190}
1191
1192static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1193	const struct ip6_tnl_parm2 *u)
1194{
1195	p->laddr = u->laddr;
1196	p->raddr = u->raddr;
1197	p->flags = u->flags;
1198	p->hop_limit = u->hop_limit;
1199	p->encap_limit = u->encap_limit;
1200	p->flowinfo = u->flowinfo;
1201	p->link = u->link;
1202	p->i_key = u->i_key;
1203	p->o_key = u->o_key;
1204	p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
1205	p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
1206	memcpy(p->name, u->name, sizeof(u->name));
1207}
1208
1209static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1210	const struct __ip6_tnl_parm *p)
1211{
1212	u->proto = IPPROTO_GRE;
1213	u->laddr = p->laddr;
1214	u->raddr = p->raddr;
1215	u->flags = p->flags;
1216	u->hop_limit = p->hop_limit;
1217	u->encap_limit = p->encap_limit;
1218	u->flowinfo = p->flowinfo;
1219	u->link = p->link;
1220	u->i_key = p->i_key;
1221	u->o_key = p->o_key;
1222	u->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
1223	u->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
1224	memcpy(u->name, p->name, sizeof(u->name));
1225}
1226
1227static int ip6gre_tunnel_ioctl(struct net_device *dev,
1228	struct ifreq *ifr, int cmd)
1229{
1230	int err = 0;
1231	struct ip6_tnl_parm2 p;
1232	struct __ip6_tnl_parm p1;
1233	struct ip6_tnl *t = netdev_priv(dev);
1234	struct net *net = t->net;
1235	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1236
1237	memset(&p1, 0, sizeof(p1));
1238
1239	switch (cmd) {
1240	case SIOCGETTUNNEL:
1241		if (dev == ign->fb_tunnel_dev) {
1242			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1243				err = -EFAULT;
1244				break;
1245			}
1246			ip6gre_tnl_parm_from_user(&p1, &p);
1247			t = ip6gre_tunnel_locate(net, &p1, 0);
1248			if (!t)
1249				t = netdev_priv(dev);
1250		}
1251		memset(&p, 0, sizeof(p));
1252		ip6gre_tnl_parm_to_user(&p, &t->parms);
1253		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1254			err = -EFAULT;
1255		break;
1256
1257	case SIOCADDTUNNEL:
1258	case SIOCCHGTUNNEL:
1259		err = -EPERM;
1260		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1261			goto done;
1262
1263		err = -EFAULT;
1264		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1265			goto done;
1266
1267		err = -EINVAL;
1268		if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
1269			goto done;
1270
1271		if (!(p.i_flags&GRE_KEY))
1272			p.i_key = 0;
1273		if (!(p.o_flags&GRE_KEY))
1274			p.o_key = 0;
1275
1276		ip6gre_tnl_parm_from_user(&p1, &p);
1277		t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
1278
1279		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1280			if (t) {
1281				if (t->dev != dev) {
1282					err = -EEXIST;
1283					break;
1284				}
1285			} else {
1286				t = netdev_priv(dev);
1287
1288				ip6gre_tunnel_unlink(ign, t);
1289				synchronize_net();
1290				ip6gre_tnl_change(t, &p1, 1);
1291				ip6gre_tunnel_link(ign, t);
1292				netdev_state_change(dev);
1293			}
1294		}
1295
1296		if (t) {
1297			err = 0;
1298
1299			memset(&p, 0, sizeof(p));
1300			ip6gre_tnl_parm_to_user(&p, &t->parms);
1301			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1302				err = -EFAULT;
1303		} else
1304			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1305		break;
1306
1307	case SIOCDELTUNNEL:
1308		err = -EPERM;
1309		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1310			goto done;
1311
1312		if (dev == ign->fb_tunnel_dev) {
1313			err = -EFAULT;
1314			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1315				goto done;
1316			err = -ENOENT;
1317			ip6gre_tnl_parm_from_user(&p1, &p);
1318			t = ip6gre_tunnel_locate(net, &p1, 0);
1319			if (!t)
1320				goto done;
1321			err = -EPERM;
1322			if (t == netdev_priv(ign->fb_tunnel_dev))
1323				goto done;
1324			dev = t->dev;
1325		}
1326		unregister_netdevice(dev);
1327		err = 0;
1328		break;
1329
1330	default:
1331		err = -EINVAL;
1332	}
1333
1334done:
1335	return err;
1336}
1337
1338static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1339			 unsigned short type, const void *daddr,
1340			 const void *saddr, unsigned int len)
1341{
1342	struct ip6_tnl *t = netdev_priv(dev);
1343	struct ipv6hdr *ipv6h;
1344	__be16 *p;
1345
1346	ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
1347	ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
1348						  t->fl.u.ip6.flowlabel,
1349						  true, &t->fl.u.ip6));
1350	ipv6h->hop_limit = t->parms.hop_limit;
1351	ipv6h->nexthdr = NEXTHDR_GRE;
1352	ipv6h->saddr = t->parms.laddr;
1353	ipv6h->daddr = t->parms.raddr;
1354
1355	p = (__be16 *)(ipv6h + 1);
1356	p[0] = t->parms.o_flags;
1357	p[1] = htons(type);
1358
1359	/*
1360	 *	Set the source hardware address.
1361	 */
1362
1363	if (saddr)
1364		memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
1365	if (daddr)
1366		memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
1367	if (!ipv6_addr_any(&ipv6h->daddr))
1368		return t->hlen;
1369
1370	return -t->hlen;
1371}
1372
1373static const struct header_ops ip6gre_header_ops = {
1374	.create	= ip6gre_header,
1375};
1376
1377static const struct net_device_ops ip6gre_netdev_ops = {
1378	.ndo_init		= ip6gre_tunnel_init,
1379	.ndo_uninit		= ip6gre_tunnel_uninit,
1380	.ndo_start_xmit		= ip6gre_tunnel_xmit,
1381	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,
1382	.ndo_change_mtu		= ip6_tnl_change_mtu,
1383	.ndo_get_stats64	= ip_tunnel_get_stats64,
1384	.ndo_get_iflink		= ip6_tnl_get_iflink,
1385};
1386
1387static void ip6gre_dev_free(struct net_device *dev)
1388{
1389	struct ip6_tnl *t = netdev_priv(dev);
1390
1391	gro_cells_destroy(&t->gro_cells);
1392	dst_cache_destroy(&t->dst_cache);
1393	free_percpu(dev->tstats);
1394}
1395
1396static void ip6gre_tunnel_setup(struct net_device *dev)
1397{
1398	dev->netdev_ops = &ip6gre_netdev_ops;
1399	dev->needs_free_netdev = true;
1400	dev->priv_destructor = ip6gre_dev_free;
1401
1402	dev->type = ARPHRD_IP6GRE;
1403
1404	dev->flags |= IFF_NOARP;
1405	dev->addr_len = sizeof(struct in6_addr);
1406	netif_keep_dst(dev);
1407	/* This perm addr will be used as interface identifier by IPv6 */
1408	dev->addr_assign_type = NET_ADDR_RANDOM;
1409	eth_random_addr(dev->perm_addr);
1410}
1411
1412#define GRE6_FEATURES (NETIF_F_SG |		\
1413		       NETIF_F_FRAGLIST |	\
1414		       NETIF_F_HIGHDMA |	\
1415		       NETIF_F_HW_CSUM)
1416
1417static void ip6gre_tnl_init_features(struct net_device *dev)
1418{
1419	struct ip6_tnl *nt = netdev_priv(dev);
1420
1421	dev->features		|= GRE6_FEATURES;
1422	dev->hw_features	|= GRE6_FEATURES;
1423
1424	if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
1425		/* TCP offload with GRE SEQ is not supported, nor
1426		 * can we support 2 levels of outer headers requiring
1427		 * an update.
1428		 */
1429		if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
1430		    nt->encap.type == TUNNEL_ENCAP_NONE) {
1431			dev->features    |= NETIF_F_GSO_SOFTWARE;
1432			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1433		}
1434
1435		/* Can use a lockless transmit, unless we generate
1436		 * output sequences
1437		 */
1438		dev->features |= NETIF_F_LLTX;
1439	}
1440}
1441
1442static int ip6gre_tunnel_init_common(struct net_device *dev)
1443{
1444	struct ip6_tnl *tunnel;
1445	int ret;
1446	int t_hlen;
1447
1448	tunnel = netdev_priv(dev);
1449
1450	tunnel->dev = dev;
1451	tunnel->net = dev_net(dev);
1452	strcpy(tunnel->parms.name, dev->name);
1453
1454	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1455	if (!dev->tstats)
1456		return -ENOMEM;
1457
1458	ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1459	if (ret)
1460		goto cleanup_alloc_pcpu_stats;
1461
1462	ret = gro_cells_init(&tunnel->gro_cells, dev);
1463	if (ret)
1464		goto cleanup_dst_cache_init;
1465
1466	t_hlen = ip6gre_calc_hlen(tunnel);
1467	dev->mtu = ETH_DATA_LEN - t_hlen;
1468	if (dev->type == ARPHRD_ETHER)
1469		dev->mtu -= ETH_HLEN;
1470	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1471		dev->mtu -= 8;
1472
1473	if (tunnel->parms.collect_md) {
1474		netif_keep_dst(dev);
1475	}
1476	ip6gre_tnl_init_features(dev);
1477
1478	return 0;
1479
1480cleanup_dst_cache_init:
1481	dst_cache_destroy(&tunnel->dst_cache);
1482cleanup_alloc_pcpu_stats:
1483	free_percpu(dev->tstats);
1484	dev->tstats = NULL;
1485	return ret;
1486}
1487
1488static int ip6gre_tunnel_init(struct net_device *dev)
1489{
1490	struct ip6_tnl *tunnel;
1491	int ret;
1492
1493	ret = ip6gre_tunnel_init_common(dev);
1494	if (ret)
1495		return ret;
1496
1497	tunnel = netdev_priv(dev);
1498
1499	if (tunnel->parms.collect_md)
1500		return 0;
1501
1502	memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1503	memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1504
1505	if (ipv6_addr_any(&tunnel->parms.raddr))
1506		dev->header_ops = &ip6gre_header_ops;
1507
1508	return 0;
1509}
1510
1511static void ip6gre_fb_tunnel_init(struct net_device *dev)
1512{
1513	struct ip6_tnl *tunnel = netdev_priv(dev);
1514
1515	tunnel->dev = dev;
1516	tunnel->net = dev_net(dev);
1517	strcpy(tunnel->parms.name, dev->name);
1518
1519	tunnel->hlen		= sizeof(struct ipv6hdr) + 4;
1520
1521	dev_hold(dev);
1522}
1523
1524static struct inet6_protocol ip6gre_protocol __read_mostly = {
1525	.handler     = gre_rcv,
1526	.err_handler = ip6gre_err,
1527	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1528};
1529
1530static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
1531{
1532	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1533	struct net_device *dev, *aux;
1534	int prio;
1535
1536	for_each_netdev_safe(net, dev, aux)
1537		if (dev->rtnl_link_ops == &ip6gre_link_ops ||
1538		    dev->rtnl_link_ops == &ip6gre_tap_ops ||
1539		    dev->rtnl_link_ops == &ip6erspan_tap_ops)
1540			unregister_netdevice_queue(dev, head);
1541
1542	for (prio = 0; prio < 4; prio++) {
1543		int h;
1544		for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
1545			struct ip6_tnl *t;
1546
1547			t = rtnl_dereference(ign->tunnels[prio][h]);
1548
1549			while (t) {
1550				/* If dev is in the same netns, it has already
1551				 * been added to the list by the previous loop.
1552				 */
1553				if (!net_eq(dev_net(t->dev), net))
1554					unregister_netdevice_queue(t->dev,
1555								   head);
1556				t = rtnl_dereference(t->next);
1557			}
1558		}
1559	}
1560}
1561
1562static int __net_init ip6gre_init_net(struct net *net)
1563{
1564	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1565	struct net_device *ndev;
1566	int err;
1567
1568	if (!net_has_fallback_tunnels(net))
1569		return 0;
1570	ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
1571			    NET_NAME_UNKNOWN, ip6gre_tunnel_setup);
1572	if (!ndev) {
1573		err = -ENOMEM;
1574		goto err_alloc_dev;
1575	}
1576	ign->fb_tunnel_dev = ndev;
1577	dev_net_set(ign->fb_tunnel_dev, net);
1578	/* FB netdevice is special: we have one, and only one per netns.
1579	 * Allowing to move it to another netns is clearly unsafe.
1580	 */
1581	ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1582
1583
1584	ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
1585	ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
1586
1587	err = register_netdev(ign->fb_tunnel_dev);
1588	if (err)
1589		goto err_reg_dev;
1590
1591	rcu_assign_pointer(ign->tunnels_wc[0],
1592			   netdev_priv(ign->fb_tunnel_dev));
1593	return 0;
1594
1595err_reg_dev:
1596	free_netdev(ndev);
1597err_alloc_dev:
1598	return err;
1599}
1600
1601static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
1602{
1603	struct net *net;
1604	LIST_HEAD(list);
1605
1606	rtnl_lock();
1607	list_for_each_entry(net, net_list, exit_list)
1608		ip6gre_destroy_tunnels(net, &list);
1609	unregister_netdevice_many(&list);
1610	rtnl_unlock();
1611}
1612
1613static struct pernet_operations ip6gre_net_ops = {
1614	.init = ip6gre_init_net,
1615	.exit_batch = ip6gre_exit_batch_net,
1616	.id   = &ip6gre_net_id,
1617	.size = sizeof(struct ip6gre_net),
1618};
1619
1620static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1621				  struct netlink_ext_ack *extack)
1622{
1623	__be16 flags;
1624
1625	if (!data)
1626		return 0;
1627
1628	flags = 0;
1629	if (data[IFLA_GRE_IFLAGS])
1630		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1631	if (data[IFLA_GRE_OFLAGS])
1632		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1633	if (flags & (GRE_VERSION|GRE_ROUTING))
1634		return -EINVAL;
1635
1636	return 0;
1637}
1638
1639static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1640			       struct netlink_ext_ack *extack)
1641{
1642	struct in6_addr daddr;
1643
1644	if (tb[IFLA_ADDRESS]) {
1645		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1646			return -EINVAL;
1647		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1648			return -EADDRNOTAVAIL;
1649	}
1650
1651	if (!data)
1652		goto out;
1653
1654	if (data[IFLA_GRE_REMOTE]) {
1655		daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
1656		if (ipv6_addr_any(&daddr))
1657			return -EINVAL;
1658	}
1659
1660out:
1661	return ip6gre_tunnel_validate(tb, data, extack);
1662}
1663
1664static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1665				  struct netlink_ext_ack *extack)
1666{
1667	__be16 flags = 0;
1668	int ret, ver = 0;
1669
1670	if (!data)
1671		return 0;
1672
1673	ret = ip6gre_tap_validate(tb, data, extack);
1674	if (ret)
1675		return ret;
1676
1677	/* ERSPAN should only have GRE sequence and key flag */
1678	if (data[IFLA_GRE_OFLAGS])
1679		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1680	if (data[IFLA_GRE_IFLAGS])
1681		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1682	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1683	    flags != (GRE_SEQ | GRE_KEY))
1684		return -EINVAL;
1685
1686	/* ERSPAN Session ID only has 10-bit. Since we reuse
1687	 * 32-bit key field as ID, check it's range.
1688	 */
1689	if (data[IFLA_GRE_IKEY] &&
1690	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1691		return -EINVAL;
1692
1693	if (data[IFLA_GRE_OKEY] &&
1694	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1695		return -EINVAL;
1696
1697	if (data[IFLA_GRE_ERSPAN_VER]) {
1698		ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1699		if (ver != 1 && ver != 2)
1700			return -EINVAL;
1701	}
1702
1703	if (ver == 1) {
1704		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1705			u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1706
1707			if (index & ~INDEX_MASK)
1708				return -EINVAL;
1709		}
1710	} else if (ver == 2) {
1711		if (data[IFLA_GRE_ERSPAN_DIR]) {
1712			u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1713
1714			if (dir & ~(DIR_MASK >> DIR_OFFSET))
1715				return -EINVAL;
1716		}
1717
1718		if (data[IFLA_GRE_ERSPAN_HWID]) {
1719			u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1720
1721			if (hwid & ~(HWID_MASK >> HWID_OFFSET))
1722				return -EINVAL;
1723		}
1724	}
1725
1726	return 0;
1727}
1728
1729static void ip6erspan_set_version(struct nlattr *data[],
1730				  struct __ip6_tnl_parm *parms)
1731{
1732	if (!data)
1733		return;
1734
1735	parms->erspan_ver = 1;
1736	if (data[IFLA_GRE_ERSPAN_VER])
1737		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1738
1739	if (parms->erspan_ver == 1) {
1740		if (data[IFLA_GRE_ERSPAN_INDEX])
1741			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1742	} else if (parms->erspan_ver == 2) {
1743		if (data[IFLA_GRE_ERSPAN_DIR])
1744			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1745		if (data[IFLA_GRE_ERSPAN_HWID])
1746			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1747	}
1748}
1749
1750static void ip6gre_netlink_parms(struct nlattr *data[],
1751				struct __ip6_tnl_parm *parms)
1752{
1753	memset(parms, 0, sizeof(*parms));
1754
1755	if (!data)
1756		return;
1757
1758	if (data[IFLA_GRE_LINK])
1759		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1760
1761	if (data[IFLA_GRE_IFLAGS])
1762		parms->i_flags = gre_flags_to_tnl_flags(
1763				nla_get_be16(data[IFLA_GRE_IFLAGS]));
1764
1765	if (data[IFLA_GRE_OFLAGS])
1766		parms->o_flags = gre_flags_to_tnl_flags(
1767				nla_get_be16(data[IFLA_GRE_OFLAGS]));
1768
1769	if (data[IFLA_GRE_IKEY])
1770		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1771
1772	if (data[IFLA_GRE_OKEY])
1773		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1774
1775	if (data[IFLA_GRE_LOCAL])
1776		parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]);
1777
1778	if (data[IFLA_GRE_REMOTE])
1779		parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]);
1780
1781	if (data[IFLA_GRE_TTL])
1782		parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1783
1784	if (data[IFLA_GRE_ENCAP_LIMIT])
1785		parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1786
1787	if (data[IFLA_GRE_FLOWINFO])
1788		parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
1789
1790	if (data[IFLA_GRE_FLAGS])
1791		parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1792
1793	if (data[IFLA_GRE_FWMARK])
1794		parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1795
1796	if (data[IFLA_GRE_COLLECT_METADATA])
1797		parms->collect_md = true;
1798}
1799
1800static int ip6gre_tap_init(struct net_device *dev)
1801{
1802	int ret;
1803
1804	ret = ip6gre_tunnel_init_common(dev);
1805	if (ret)
1806		return ret;
1807
1808	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1809
1810	return 0;
1811}
1812
1813static const struct net_device_ops ip6gre_tap_netdev_ops = {
1814	.ndo_init = ip6gre_tap_init,
1815	.ndo_uninit = ip6gre_tunnel_uninit,
1816	.ndo_start_xmit = ip6gre_tunnel_xmit,
1817	.ndo_set_mac_address = eth_mac_addr,
1818	.ndo_validate_addr = eth_validate_addr,
1819	.ndo_change_mtu = ip6_tnl_change_mtu,
1820	.ndo_get_stats64 = ip_tunnel_get_stats64,
1821	.ndo_get_iflink = ip6_tnl_get_iflink,
1822};
1823
1824static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel)
1825{
1826	int t_hlen;
1827
1828	tunnel->tun_hlen = 8;
1829	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1830		       erspan_hdr_len(tunnel->parms.erspan_ver);
1831
1832	t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
1833	tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
1834	return t_hlen;
1835}
1836
1837static int ip6erspan_tap_init(struct net_device *dev)
1838{
1839	struct ip6_tnl *tunnel;
1840	int t_hlen;
1841	int ret;
1842
1843	tunnel = netdev_priv(dev);
1844
1845	tunnel->dev = dev;
1846	tunnel->net = dev_net(dev);
1847	strcpy(tunnel->parms.name, dev->name);
1848
1849	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1850	if (!dev->tstats)
1851		return -ENOMEM;
1852
1853	ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1854	if (ret)
1855		goto cleanup_alloc_pcpu_stats;
1856
1857	ret = gro_cells_init(&tunnel->gro_cells, dev);
1858	if (ret)
1859		goto cleanup_dst_cache_init;
1860
1861	t_hlen = ip6erspan_calc_hlen(tunnel);
1862	dev->mtu = ETH_DATA_LEN - t_hlen;
1863	if (dev->type == ARPHRD_ETHER)
1864		dev->mtu -= ETH_HLEN;
1865	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1866		dev->mtu -= 8;
1867
1868	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1869	ip6erspan_tnl_link_config(tunnel, 1);
1870
1871	return 0;
1872
1873cleanup_dst_cache_init:
1874	dst_cache_destroy(&tunnel->dst_cache);
1875cleanup_alloc_pcpu_stats:
1876	free_percpu(dev->tstats);
1877	dev->tstats = NULL;
1878	return ret;
1879}
1880
1881static const struct net_device_ops ip6erspan_netdev_ops = {
1882	.ndo_init =		ip6erspan_tap_init,
1883	.ndo_uninit =		ip6erspan_tunnel_uninit,
1884	.ndo_start_xmit =	ip6erspan_tunnel_xmit,
1885	.ndo_set_mac_address =	eth_mac_addr,
1886	.ndo_validate_addr =	eth_validate_addr,
1887	.ndo_change_mtu =	ip6_tnl_change_mtu,
1888	.ndo_get_stats64 =	ip_tunnel_get_stats64,
1889	.ndo_get_iflink =	ip6_tnl_get_iflink,
1890};
1891
1892static void ip6gre_tap_setup(struct net_device *dev)
1893{
1894
1895	ether_setup(dev);
1896
1897	dev->max_mtu = 0;
1898	dev->netdev_ops = &ip6gre_tap_netdev_ops;
1899	dev->needs_free_netdev = true;
1900	dev->priv_destructor = ip6gre_dev_free;
1901
1902	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1903	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1904	netif_keep_dst(dev);
1905}
1906
1907static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
1908				       struct ip_tunnel_encap *ipencap)
1909{
1910	bool ret = false;
1911
1912	memset(ipencap, 0, sizeof(*ipencap));
1913
1914	if (!data)
1915		return ret;
1916
1917	if (data[IFLA_GRE_ENCAP_TYPE]) {
1918		ret = true;
1919		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1920	}
1921
1922	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1923		ret = true;
1924		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1925	}
1926
1927	if (data[IFLA_GRE_ENCAP_SPORT]) {
1928		ret = true;
1929		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1930	}
1931
1932	if (data[IFLA_GRE_ENCAP_DPORT]) {
1933		ret = true;
1934		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1935	}
1936
1937	return ret;
1938}
1939
1940static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
1941				 struct nlattr *tb[], struct nlattr *data[],
1942				 struct netlink_ext_ack *extack)
1943{
1944	struct ip6_tnl *nt;
1945	struct ip_tunnel_encap ipencap;
1946	int err;
1947
1948	nt = netdev_priv(dev);
1949
1950	if (ip6gre_netlink_encap_parms(data, &ipencap)) {
1951		int err = ip6_tnl_encap_setup(nt, &ipencap);
1952
1953		if (err < 0)
1954			return err;
1955	}
1956
1957	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1958		eth_hw_addr_random(dev);
1959
1960	nt->dev = dev;
1961	nt->net = dev_net(dev);
1962
1963	err = register_netdevice(dev);
1964	if (err)
1965		goto out;
1966
1967	if (tb[IFLA_MTU])
1968		ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1969
1970	dev_hold(dev);
1971
1972out:
1973	return err;
1974}
1975
1976static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1977			  struct nlattr *tb[], struct nlattr *data[],
1978			  struct netlink_ext_ack *extack)
1979{
1980	struct ip6_tnl *nt = netdev_priv(dev);
1981	struct net *net = dev_net(dev);
1982	struct ip6gre_net *ign;
1983	int err;
1984
1985	ip6gre_netlink_parms(data, &nt->parms);
1986	ign = net_generic(net, ip6gre_net_id);
1987
1988	if (nt->parms.collect_md) {
1989		if (rtnl_dereference(ign->collect_md_tun))
1990			return -EEXIST;
1991	} else {
1992		if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1993			return -EEXIST;
1994	}
1995
1996	err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
1997	if (!err) {
1998		ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1999		ip6gre_tunnel_link_md(ign, nt);
2000		ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
2001	}
2002	return err;
2003}
2004
2005static struct ip6_tnl *
2006ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[],
2007			 struct nlattr *data[], struct __ip6_tnl_parm *p_p,
2008			 struct netlink_ext_ack *extack)
2009{
2010	struct ip6_tnl *t, *nt = netdev_priv(dev);
2011	struct net *net = nt->net;
2012	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
2013	struct ip_tunnel_encap ipencap;
2014
2015	if (dev == ign->fb_tunnel_dev)
2016		return ERR_PTR(-EINVAL);
2017
2018	if (ip6gre_netlink_encap_parms(data, &ipencap)) {
2019		int err = ip6_tnl_encap_setup(nt, &ipencap);
2020
2021		if (err < 0)
2022			return ERR_PTR(err);
2023	}
2024
2025	ip6gre_netlink_parms(data, p_p);
2026
2027	t = ip6gre_tunnel_locate(net, p_p, 0);
2028
2029	if (t) {
2030		if (t->dev != dev)
2031			return ERR_PTR(-EEXIST);
2032	} else {
2033		t = nt;
2034	}
2035
2036	return t;
2037}
2038
2039static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
2040			     struct nlattr *data[],
2041			     struct netlink_ext_ack *extack)
2042{
2043	struct ip6_tnl *t = netdev_priv(dev);
2044	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
2045	struct __ip6_tnl_parm p;
2046
2047	t = ip6gre_changelink_common(dev, tb, data, &p, extack);
2048	if (IS_ERR(t))
2049		return PTR_ERR(t);
2050
2051	ip6gre_tunnel_unlink_md(ign, t);
2052	ip6gre_tunnel_unlink(ign, t);
2053	ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
2054	ip6gre_tunnel_link_md(ign, t);
2055	ip6gre_tunnel_link(ign, t);
2056	return 0;
2057}
2058
2059static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
2060{
2061	struct net *net = dev_net(dev);
2062	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
2063
2064	if (dev != ign->fb_tunnel_dev)
2065		unregister_netdevice_queue(dev, head);
2066}
2067
2068static size_t ip6gre_get_size(const struct net_device *dev)
2069{
2070	return
2071		/* IFLA_GRE_LINK */
2072		nla_total_size(4) +
2073		/* IFLA_GRE_IFLAGS */
2074		nla_total_size(2) +
2075		/* IFLA_GRE_OFLAGS */
2076		nla_total_size(2) +
2077		/* IFLA_GRE_IKEY */
2078		nla_total_size(4) +
2079		/* IFLA_GRE_OKEY */
2080		nla_total_size(4) +
2081		/* IFLA_GRE_LOCAL */
2082		nla_total_size(sizeof(struct in6_addr)) +
2083		/* IFLA_GRE_REMOTE */
2084		nla_total_size(sizeof(struct in6_addr)) +
2085		/* IFLA_GRE_TTL */
2086		nla_total_size(1) +
2087		/* IFLA_GRE_ENCAP_LIMIT */
2088		nla_total_size(1) +
2089		/* IFLA_GRE_FLOWINFO */
2090		nla_total_size(4) +
2091		/* IFLA_GRE_FLAGS */
2092		nla_total_size(4) +
2093		/* IFLA_GRE_ENCAP_TYPE */
2094		nla_total_size(2) +
2095		/* IFLA_GRE_ENCAP_FLAGS */
2096		nla_total_size(2) +
2097		/* IFLA_GRE_ENCAP_SPORT */
2098		nla_total_size(2) +
2099		/* IFLA_GRE_ENCAP_DPORT */
2100		nla_total_size(2) +
2101		/* IFLA_GRE_COLLECT_METADATA */
2102		nla_total_size(0) +
2103		/* IFLA_GRE_FWMARK */
2104		nla_total_size(4) +
2105		/* IFLA_GRE_ERSPAN_INDEX */
2106		nla_total_size(4) +
2107		0;
2108}
2109
2110static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
2111{
2112	struct ip6_tnl *t = netdev_priv(dev);
2113	struct __ip6_tnl_parm *p = &t->parms;
2114	__be16 o_flags = p->o_flags;
2115
2116	if (p->erspan_ver == 1 || p->erspan_ver == 2) {
2117		if (!p->collect_md)
2118			o_flags |= TUNNEL_KEY;
2119
2120		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
2121			goto nla_put_failure;
2122
2123		if (p->erspan_ver == 1) {
2124			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
2125				goto nla_put_failure;
2126		} else {
2127			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
2128				goto nla_put_failure;
2129			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
2130				goto nla_put_failure;
2131		}
2132	}
2133
2134	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
2135	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
2136			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
2137	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
2138			 gre_tnl_flags_to_gre_flags(o_flags)) ||
2139	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
2140	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
2141	    nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
2142	    nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) ||
2143	    nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
2144	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
2145	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
2146	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
2147	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
2148		goto nla_put_failure;
2149
2150	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
2151			t->encap.type) ||
2152	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
2153			 t->encap.sport) ||
2154	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
2155			 t->encap.dport) ||
2156	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
2157			t->encap.flags))
2158		goto nla_put_failure;
2159
2160	if (p->collect_md) {
2161		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
2162			goto nla_put_failure;
2163	}
2164
2165	return 0;
2166
2167nla_put_failure:
2168	return -EMSGSIZE;
2169}
2170
2171static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
2172	[IFLA_GRE_LINK]        = { .type = NLA_U32 },
2173	[IFLA_GRE_IFLAGS]      = { .type = NLA_U16 },
2174	[IFLA_GRE_OFLAGS]      = { .type = NLA_U16 },
2175	[IFLA_GRE_IKEY]        = { .type = NLA_U32 },
2176	[IFLA_GRE_OKEY]        = { .type = NLA_U32 },
2177	[IFLA_GRE_LOCAL]       = { .len = sizeof_field(struct ipv6hdr, saddr) },
2178	[IFLA_GRE_REMOTE]      = { .len = sizeof_field(struct ipv6hdr, daddr) },
2179	[IFLA_GRE_TTL]         = { .type = NLA_U8 },
2180	[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
2181	[IFLA_GRE_FLOWINFO]    = { .type = NLA_U32 },
2182	[IFLA_GRE_FLAGS]       = { .type = NLA_U32 },
2183	[IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
2184	[IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
2185	[IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
2186	[IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
2187	[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
2188	[IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
2189	[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
2190	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
2191	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
2192	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
2193};
2194
2195static void ip6erspan_tap_setup(struct net_device *dev)
2196{
2197	ether_setup(dev);
2198
2199	dev->max_mtu = 0;
2200	dev->netdev_ops = &ip6erspan_netdev_ops;
2201	dev->needs_free_netdev = true;
2202	dev->priv_destructor = ip6gre_dev_free;
2203
2204	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
2205	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
2206	netif_keep_dst(dev);
2207}
2208
2209static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
2210			     struct nlattr *tb[], struct nlattr *data[],
2211			     struct netlink_ext_ack *extack)
2212{
2213	struct ip6_tnl *nt = netdev_priv(dev);
2214	struct net *net = dev_net(dev);
2215	struct ip6gre_net *ign;
2216	int err;
2217
2218	ip6gre_netlink_parms(data, &nt->parms);
2219	ip6erspan_set_version(data, &nt->parms);
2220	ign = net_generic(net, ip6gre_net_id);
2221
2222	if (nt->parms.collect_md) {
2223		if (rtnl_dereference(ign->collect_md_tun_erspan))
2224			return -EEXIST;
2225	} else {
2226		if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
2227			return -EEXIST;
2228	}
2229
2230	err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
2231	if (!err) {
2232		ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
2233		ip6erspan_tunnel_link_md(ign, nt);
2234		ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
2235	}
2236	return err;
2237}
2238
2239static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu)
2240{
2241	ip6gre_tnl_link_config_common(t);
2242	ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t));
2243}
2244
2245static int ip6erspan_tnl_change(struct ip6_tnl *t,
2246				const struct __ip6_tnl_parm *p, int set_mtu)
2247{
2248	ip6gre_tnl_copy_tnl_parm(t, p);
2249	ip6erspan_tnl_link_config(t, set_mtu);
2250	return 0;
2251}
2252
2253static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
2254				struct nlattr *data[],
2255				struct netlink_ext_ack *extack)
2256{
2257	struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
2258	struct __ip6_tnl_parm p;
2259	struct ip6_tnl *t;
2260
2261	t = ip6gre_changelink_common(dev, tb, data, &p, extack);
2262	if (IS_ERR(t))
2263		return PTR_ERR(t);
2264
2265	ip6erspan_set_version(data, &p);
2266	ip6gre_tunnel_unlink_md(ign, t);
2267	ip6gre_tunnel_unlink(ign, t);
2268	ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
2269	ip6erspan_tunnel_link_md(ign, t);
2270	ip6gre_tunnel_link(ign, t);
2271	return 0;
2272}
2273
2274static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
2275	.kind		= "ip6gre",
2276	.maxtype	= IFLA_GRE_MAX,
2277	.policy		= ip6gre_policy,
2278	.priv_size	= sizeof(struct ip6_tnl),
2279	.setup		= ip6gre_tunnel_setup,
2280	.validate	= ip6gre_tunnel_validate,
2281	.newlink	= ip6gre_newlink,
2282	.changelink	= ip6gre_changelink,
2283	.dellink	= ip6gre_dellink,
2284	.get_size	= ip6gre_get_size,
2285	.fill_info	= ip6gre_fill_info,
2286	.get_link_net	= ip6_tnl_get_link_net,
2287};
2288
2289static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
2290	.kind		= "ip6gretap",
2291	.maxtype	= IFLA_GRE_MAX,
2292	.policy		= ip6gre_policy,
2293	.priv_size	= sizeof(struct ip6_tnl),
2294	.setup		= ip6gre_tap_setup,
2295	.validate	= ip6gre_tap_validate,
2296	.newlink	= ip6gre_newlink,
2297	.changelink	= ip6gre_changelink,
2298	.get_size	= ip6gre_get_size,
2299	.fill_info	= ip6gre_fill_info,
2300	.get_link_net	= ip6_tnl_get_link_net,
2301};
2302
2303static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
2304	.kind		= "ip6erspan",
2305	.maxtype	= IFLA_GRE_MAX,
2306	.policy		= ip6gre_policy,
2307	.priv_size	= sizeof(struct ip6_tnl),
2308	.setup		= ip6erspan_tap_setup,
2309	.validate	= ip6erspan_tap_validate,
2310	.newlink	= ip6erspan_newlink,
2311	.changelink	= ip6erspan_changelink,
2312	.get_size	= ip6gre_get_size,
2313	.fill_info	= ip6gre_fill_info,
2314	.get_link_net	= ip6_tnl_get_link_net,
2315};
2316
2317/*
2318 *	And now the modules code and kernel interface.
2319 */
2320
2321static int __init ip6gre_init(void)
2322{
2323	int err;
2324
2325	pr_info("GRE over IPv6 tunneling driver\n");
2326
2327	err = register_pernet_device(&ip6gre_net_ops);
2328	if (err < 0)
2329		return err;
2330
2331	err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
2332	if (err < 0) {
2333		pr_info("%s: can't add protocol\n", __func__);
2334		goto add_proto_failed;
2335	}
2336
2337	err = rtnl_link_register(&ip6gre_link_ops);
2338	if (err < 0)
2339		goto rtnl_link_failed;
2340
2341	err = rtnl_link_register(&ip6gre_tap_ops);
2342	if (err < 0)
2343		goto tap_ops_failed;
2344
2345	err = rtnl_link_register(&ip6erspan_tap_ops);
2346	if (err < 0)
2347		goto erspan_link_failed;
2348
2349out:
2350	return err;
2351
2352erspan_link_failed:
2353	rtnl_link_unregister(&ip6gre_tap_ops);
2354tap_ops_failed:
2355	rtnl_link_unregister(&ip6gre_link_ops);
2356rtnl_link_failed:
2357	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
2358add_proto_failed:
2359	unregister_pernet_device(&ip6gre_net_ops);
2360	goto out;
2361}
2362
2363static void __exit ip6gre_fini(void)
2364{
2365	rtnl_link_unregister(&ip6gre_tap_ops);
2366	rtnl_link_unregister(&ip6gre_link_ops);
2367	rtnl_link_unregister(&ip6erspan_tap_ops);
2368	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
2369	unregister_pernet_device(&ip6gre_net_ops);
2370}
2371
2372module_init(ip6gre_init);
2373module_exit(ip6gre_fini);
2374MODULE_LICENSE("GPL");
2375MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
2376MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
2377MODULE_ALIAS_RTNL_LINK("ip6gre");
2378MODULE_ALIAS_RTNL_LINK("ip6gretap");
2379MODULE_ALIAS_RTNL_LINK("ip6erspan");
2380MODULE_ALIAS_NETDEV("ip6gre0");