Linux Audio

Check our new training course

Loading...
v3.15
  1/*
  2 *	Linux NET3:	GRE over IP protocol decoder.
  3 *
  4 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
  5 *
  6 *	This program is free software; you can redistribute it and/or
  7 *	modify it under the terms of the GNU General Public License
  8 *	as published by the Free Software Foundation; either version
  9 *	2 of the License, or (at your option) any later version.
 10 *
 11 */
 12
 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 14
 15#include <linux/capability.h>
 16#include <linux/module.h>
 17#include <linux/types.h>
 18#include <linux/kernel.h>
 19#include <linux/slab.h>
 20#include <asm/uaccess.h>
 21#include <linux/skbuff.h>
 22#include <linux/netdevice.h>
 23#include <linux/in.h>
 24#include <linux/tcp.h>
 25#include <linux/udp.h>
 26#include <linux/if_arp.h>
 27#include <linux/mroute.h>
 28#include <linux/init.h>
 29#include <linux/in6.h>
 30#include <linux/inetdevice.h>
 31#include <linux/igmp.h>
 32#include <linux/netfilter_ipv4.h>
 33#include <linux/etherdevice.h>
 34#include <linux/if_ether.h>
 35
 36#include <net/sock.h>
 37#include <net/ip.h>
 38#include <net/icmp.h>
 39#include <net/protocol.h>
 40#include <net/ip_tunnels.h>
 41#include <net/arp.h>
 42#include <net/checksum.h>
 43#include <net/dsfield.h>
 44#include <net/inet_ecn.h>
 45#include <net/xfrm.h>
 46#include <net/net_namespace.h>
 47#include <net/netns/generic.h>
 48#include <net/rtnetlink.h>
 49#include <net/gre.h>
 50
 51#if IS_ENABLED(CONFIG_IPV6)
 52#include <net/ipv6.h>
 53#include <net/ip6_fib.h>
 54#include <net/ip6_route.h>
 55#endif
 56
 57/*
 58   Problems & solutions
 59   --------------------
 60
 61   1. The most important issue is detecting local dead loops.
 62   They would cause complete host lockup in transmit, which
 63   would be "resolved" by stack overflow or, if queueing is enabled,
 64   with infinite looping in net_bh.
 65
 66   We cannot track such dead loops during route installation,
 67   it is infeasible task. The most general solutions would be
 68   to keep skb->encapsulation counter (sort of local ttl),
 69   and silently drop packet when it expires. It is a good
 70   solution, but it supposes maintaining new variable in ALL
 71   skb, even if no tunneling is used.
 72
 73   Current solution: xmit_recursion breaks dead loops. This is a percpu
 74   counter, since when we enter the first ndo_xmit(), cpu migration is
 75   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
 76
 77   2. Networking dead loops would not kill routers, but would really
 78   kill network. IP hop limit plays role of "t->recursion" in this case,
 79   if we copy it from packet being encapsulated to upper header.
 80   It is very good solution, but it introduces two problems:
 81
 82   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
 83     do not work over tunnels.
 84   - traceroute does not work. I planned to relay ICMP from tunnel,
 85     so that this problem would be solved and traceroute output
 86     would even more informative. This idea appeared to be wrong:
 87     only Linux complies to rfc1812 now (yes, guys, Linux is the only
 88     true router now :-)), all routers (at least, in neighbourhood of mine)
 89     return only 8 bytes of payload. It is the end.
 90
 91   Hence, if we want that OSPF worked or traceroute said something reasonable,
 92   we should search for another solution.
 93
 94   One of them is to parse packet trying to detect inner encapsulation
 95   made by our node. It is difficult or even impossible, especially,
 96   taking into account fragmentation. TO be short, ttl is not solution at all.
 97
 98   Current solution: The solution was UNEXPECTEDLY SIMPLE.
 99   We force DF flag on tunnels with preconfigured hop limit,
100   that is ALL. :-) Well, it does not remove the problem completely,
101   but exponential growth of network traffic is changed to linear
102   (branches, that exceed pmtu are pruned) and tunnel mtu
103   rapidly degrades to value <68, where looping stops.
104   Yes, it is not good if there exists a router in the loop,
105   which does not force DF, even when encapsulating packets have DF set.
106   But it is not our problem! Nobody could accuse us, we made
107   all that we could make. Even if it is your gated who injected
108   fatal route to network, even if it were you who configured
109   fatal static route: you are innocent. :-)
110
111   Alexey Kuznetsov.
112 */
113
114static bool log_ecn_error = true;
115module_param(log_ecn_error, bool, 0644);
116MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
117
118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
119static int ipgre_tunnel_init(struct net_device *dev);
120
121static int ipgre_net_id __read_mostly;
122static int gre_tap_net_id __read_mostly;
123
124static int ipgre_err(struct sk_buff *skb, u32 info,
125		     const struct tnl_ptk_info *tpi)
126{
127
128	/* All the routers (except for Linux) return only
129	   8 bytes of packet payload. It means, that precise relaying of
130	   ICMP in the real Internet is absolutely infeasible.
131
132	   Moreover, Cisco "wise men" put GRE key to the third word
133	   in GRE header. It makes impossible maintaining even soft
134	   state for keyed GRE tunnels with enabled checksum. Tell
135	   them "thank you".
136
137	   Well, I wonder, rfc1812 was written by Cisco employee,
138	   what the hell these idiots break standards established
139	   by themselves???
140	   */
141	struct net *net = dev_net(skb->dev);
142	struct ip_tunnel_net *itn;
143	const struct iphdr *iph;
144	const int type = icmp_hdr(skb)->type;
145	const int code = icmp_hdr(skb)->code;
 
146	struct ip_tunnel *t;
147
148	switch (type) {
149	default:
150	case ICMP_PARAMETERPROB:
151		return PACKET_RCVD;
152
153	case ICMP_DEST_UNREACH:
154		switch (code) {
155		case ICMP_SR_FAILED:
156		case ICMP_PORT_UNREACH:
157			/* Impossible event. */
158			return PACKET_RCVD;
159		default:
160			/* All others are translated to HOST_UNREACH.
161			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162			   I believe they are just ether pollution. --ANK
163			 */
164			break;
165		}
166		break;
 
167	case ICMP_TIME_EXCEEDED:
168		if (code != ICMP_EXC_TTL)
169			return PACKET_RCVD;
 
170		break;
171
172	case ICMP_REDIRECT:
173		break;
174	}
175
176	if (tpi->proto == htons(ETH_P_TEB))
177		itn = net_generic(net, gre_tap_net_id);
178	else
179		itn = net_generic(net, ipgre_net_id);
180
181	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
182	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
183			     iph->daddr, iph->saddr, tpi->key);
184
185	if (t == NULL)
186		return PACKET_REJECT;
 
 
 
 
 
 
 
187
188	if (t->parms.iph.daddr == 0 ||
189	    ipv4_is_multicast(t->parms.iph.daddr))
190		return PACKET_RCVD;
191
192	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
193		return PACKET_RCVD;
194
195	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
196		t->err_count++;
197	else
198		t->err_count = 1;
199	t->err_time = jiffies;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200	return PACKET_RCVD;
201}
202
203static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 
204{
205	struct net *net = dev_net(skb->dev);
206	struct ip_tunnel_net *itn;
207	const struct iphdr *iph;
208	struct ip_tunnel *tunnel;
209
210	if (tpi->proto == htons(ETH_P_TEB))
211		itn = net_generic(net, gre_tap_net_id);
212	else
213		itn = net_generic(net, ipgre_net_id);
214
215	iph = ip_hdr(skb);
216	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
217				  iph->saddr, iph->daddr, tpi->key);
 
 
 
 
 
 
 
218
219	if (tunnel) {
220		skb_pop_mac_header(skb);
221		ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
222		return PACKET_RCVD;
 
 
 
 
 
 
 
223	}
224	return PACKET_REJECT;
 
 
 
 
 
 
 
 
 
 
 
 
225}
226
227static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
228		       const struct iphdr *tnl_params,
229		       __be16 proto)
230{
231	struct ip_tunnel *tunnel = netdev_priv(dev);
232	struct tnl_ptk_info tpi;
233
234	tpi.flags = tunnel->parms.o_flags;
235	tpi.proto = proto;
236	tpi.key = tunnel->parms.o_key;
237	if (tunnel->parms.o_flags & TUNNEL_SEQ)
238		tunnel->o_seqno++;
239	tpi.seq = htonl(tunnel->o_seqno);
240
241	/* Push GRE header. */
242	gre_build_header(skb, &tpi, tunnel->hlen);
 
 
243
244	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
245}
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
248			      struct net_device *dev)
249{
250	struct ip_tunnel *tunnel = netdev_priv(dev);
251	const struct iphdr *tnl_params;
252
253	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
254	if (IS_ERR(skb))
255		goto out;
 
256
257	if (dev->header_ops) {
258		/* Need space for new headers */
259		if (skb_cow_head(skb, dev->needed_headroom -
260				      (tunnel->hlen + sizeof(struct iphdr))))
261			goto free_skb;
262
263		tnl_params = (const struct iphdr *)skb->data;
264
265		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
266		 * to gre header.
267		 */
268		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 
269	} else {
270		if (skb_cow_head(skb, dev->needed_headroom))
271			goto free_skb;
272
273		tnl_params = &tunnel->parms.iph;
274	}
275
276	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 
277
 
278	return NETDEV_TX_OK;
279
280free_skb:
281	kfree_skb(skb);
282out:
283	dev->stats.tx_dropped++;
284	return NETDEV_TX_OK;
285}
286
287static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
288				struct net_device *dev)
289{
290	struct ip_tunnel *tunnel = netdev_priv(dev);
291
292	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
293	if (IS_ERR(skb))
294		goto out;
 
 
 
 
295
296	if (skb_cow_head(skb, dev->needed_headroom))
297		goto free_skb;
298
299	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
300
301	return NETDEV_TX_OK;
302
303free_skb:
304	kfree_skb(skb);
305out:
306	dev->stats.tx_dropped++;
307	return NETDEV_TX_OK;
308}
309
310static int ipgre_tunnel_ioctl(struct net_device *dev,
311			      struct ifreq *ifr, int cmd)
312{
313	int err = 0;
314	struct ip_tunnel_parm p;
315
316	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
317		return -EFAULT;
318	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
319		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
320		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
321		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
322			return -EINVAL;
323	}
324	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
325	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
326
327	err = ip_tunnel_ioctl(dev, &p, cmd);
328	if (err)
329		return err;
330
331	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
332	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
333
334	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
335		return -EFAULT;
336	return 0;
337}
338
339/* Nice toy. Unfortunately, useless in real life :-)
340   It allows to construct virtual multiprotocol broadcast "LAN"
341   over the Internet, provided multicast routing is tuned.
342
343
344   I have no idea was this bicycle invented before me,
345   so that I had to set ARPHRD_IPGRE to a random value.
346   I have an impression, that Cisco could make something similar,
347   but this feature is apparently missing in IOS<=11.2(8).
348
349   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
350   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
351
352   ping -t 255 224.66.66.66
353
354   If nobody answers, mbone does not work.
355
356   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
357   ip addr add 10.66.66.<somewhat>/24 dev Universe
358   ifconfig Universe up
359   ifconfig Universe add fe80::<Your_real_addr>/10
360   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
361   ftp 10.66.66.66
362   ...
363   ftp fec0:6666:6666::193.233.7.65
364   ...
365 */
366static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
367			unsigned short type,
368			const void *daddr, const void *saddr, unsigned int len)
369{
370	struct ip_tunnel *t = netdev_priv(dev);
371	struct iphdr *iph;
372	struct gre_base_hdr *greh;
373
374	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
375	greh = (struct gre_base_hdr *)(iph+1);
376	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
377	greh->protocol = htons(type);
378
379	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
380
381	/* Set the source hardware address. */
382	if (saddr)
383		memcpy(&iph->saddr, saddr, 4);
384	if (daddr)
385		memcpy(&iph->daddr, daddr, 4);
386	if (iph->daddr)
387		return t->hlen + sizeof(*iph);
388
389	return -(t->hlen + sizeof(*iph));
390}
391
392static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
393{
394	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
395	memcpy(haddr, &iph->saddr, 4);
396	return 4;
397}
398
399static const struct header_ops ipgre_header_ops = {
400	.create	= ipgre_header,
401	.parse	= ipgre_header_parse,
402};
403
404#ifdef CONFIG_NET_IPGRE_BROADCAST
405static int ipgre_open(struct net_device *dev)
406{
407	struct ip_tunnel *t = netdev_priv(dev);
408
409	if (ipv4_is_multicast(t->parms.iph.daddr)) {
410		struct flowi4 fl4;
411		struct rtable *rt;
412
413		rt = ip_route_output_gre(dev_net(dev), &fl4,
414					 t->parms.iph.daddr,
415					 t->parms.iph.saddr,
416					 t->parms.o_key,
417					 RT_TOS(t->parms.iph.tos),
418					 t->parms.link);
419		if (IS_ERR(rt))
420			return -EADDRNOTAVAIL;
421		dev = rt->dst.dev;
422		ip_rt_put(rt);
423		if (__in_dev_get_rtnl(dev) == NULL)
424			return -EADDRNOTAVAIL;
425		t->mlink = dev->ifindex;
426		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
427	}
428	return 0;
429}
430
431static int ipgre_close(struct net_device *dev)
432{
433	struct ip_tunnel *t = netdev_priv(dev);
434
435	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
436		struct in_device *in_dev;
437		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
438		if (in_dev)
439			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
440	}
441	return 0;
442}
443#endif
444
445static const struct net_device_ops ipgre_netdev_ops = {
446	.ndo_init		= ipgre_tunnel_init,
447	.ndo_uninit		= ip_tunnel_uninit,
448#ifdef CONFIG_NET_IPGRE_BROADCAST
449	.ndo_open		= ipgre_open,
450	.ndo_stop		= ipgre_close,
451#endif
452	.ndo_start_xmit		= ipgre_xmit,
453	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
454	.ndo_change_mtu		= ip_tunnel_change_mtu,
455	.ndo_get_stats64	= ip_tunnel_get_stats64,
 
456};
457
458#define GRE_FEATURES (NETIF_F_SG |		\
459		      NETIF_F_FRAGLIST |	\
460		      NETIF_F_HIGHDMA |		\
461		      NETIF_F_HW_CSUM)
462
463static void ipgre_tunnel_setup(struct net_device *dev)
464{
465	dev->netdev_ops		= &ipgre_netdev_ops;
466	dev->type		= ARPHRD_IPGRE;
467	ip_tunnel_setup(dev, ipgre_net_id);
468}
469
470static void __gre_tunnel_init(struct net_device *dev)
471{
472	struct ip_tunnel *tunnel;
 
473
474	tunnel = netdev_priv(dev);
475	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
476	tunnel->parms.iph.protocol = IPPROTO_GRE;
477
478	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
479	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 
480
481	dev->features		|= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
 
 
 
482	dev->hw_features	|= GRE_FEATURES;
483
484	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
485		/* TCP offload with GRE SEQ is not supported. */
486		dev->features    |= NETIF_F_GSO_SOFTWARE;
487		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
 
 
 
 
 
 
488		/* Can use a lockless transmit, unless we generate
489		 * output sequences
490		 */
491		dev->features |= NETIF_F_LLTX;
492	}
493}
494
495static int ipgre_tunnel_init(struct net_device *dev)
496{
497	struct ip_tunnel *tunnel = netdev_priv(dev);
498	struct iphdr *iph = &tunnel->parms.iph;
499
500	__gre_tunnel_init(dev);
501
502	memcpy(dev->dev_addr, &iph->saddr, 4);
503	memcpy(dev->broadcast, &iph->daddr, 4);
504
505	dev->flags		= IFF_NOARP;
506	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
507	dev->addr_len		= 4;
508
509	if (iph->daddr) {
510#ifdef CONFIG_NET_IPGRE_BROADCAST
511		if (ipv4_is_multicast(iph->daddr)) {
512			if (!iph->saddr)
513				return -EINVAL;
514			dev->flags = IFF_BROADCAST;
515			dev->header_ops = &ipgre_header_ops;
516		}
517#endif
518	} else
519		dev->header_ops = &ipgre_header_ops;
 
520
521	return ip_tunnel_init(dev);
522}
523
524static struct gre_cisco_protocol ipgre_protocol = {
525	.handler        = ipgre_rcv,
526	.err_handler    = ipgre_err,
527	.priority       = 0,
528};
529
530static int __net_init ipgre_init_net(struct net *net)
531{
532	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
533}
534
535static void __net_exit ipgre_exit_net(struct net *net)
536{
537	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
538	ip_tunnel_delete_net(itn, &ipgre_link_ops);
539}
540
541static struct pernet_operations ipgre_net_ops = {
542	.init = ipgre_init_net,
543	.exit = ipgre_exit_net,
544	.id   = &ipgre_net_id,
545	.size = sizeof(struct ip_tunnel_net),
546};
547
548static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
549{
550	__be16 flags;
551
552	if (!data)
553		return 0;
554
555	flags = 0;
556	if (data[IFLA_GRE_IFLAGS])
557		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
558	if (data[IFLA_GRE_OFLAGS])
559		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
560	if (flags & (GRE_VERSION|GRE_ROUTING))
561		return -EINVAL;
562
 
 
 
 
 
563	return 0;
564}
565
566static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
567{
568	__be32 daddr;
569
570	if (tb[IFLA_ADDRESS]) {
571		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
572			return -EINVAL;
573		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
574			return -EADDRNOTAVAIL;
575	}
576
577	if (!data)
578		goto out;
579
580	if (data[IFLA_GRE_REMOTE]) {
581		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
582		if (!daddr)
583			return -EINVAL;
584	}
585
586out:
587	return ipgre_tunnel_validate(tb, data);
588}
589
590static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
591			       struct ip_tunnel_parm *parms)
 
 
592{
 
 
593	memset(parms, 0, sizeof(*parms));
594
595	parms->iph.protocol = IPPROTO_GRE;
596
597	if (!data)
598		return;
599
600	if (data[IFLA_GRE_LINK])
601		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
602
603	if (data[IFLA_GRE_IFLAGS])
604		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
605
606	if (data[IFLA_GRE_OFLAGS])
607		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
608
609	if (data[IFLA_GRE_IKEY])
610		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
611
612	if (data[IFLA_GRE_OKEY])
613		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
614
615	if (data[IFLA_GRE_LOCAL])
616		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
617
618	if (data[IFLA_GRE_REMOTE])
619		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
620
621	if (data[IFLA_GRE_TTL])
622		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
623
624	if (data[IFLA_GRE_TOS])
625		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
626
627	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
 
 
628		parms->iph.frag_off = htons(IP_DF);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629}
630
631static int gre_tap_init(struct net_device *dev)
632{
633	__gre_tunnel_init(dev);
 
634
635	return ip_tunnel_init(dev);
636}
637
638static const struct net_device_ops gre_tap_netdev_ops = {
639	.ndo_init		= gre_tap_init,
640	.ndo_uninit		= ip_tunnel_uninit,
641	.ndo_start_xmit		= gre_tap_xmit,
642	.ndo_set_mac_address 	= eth_mac_addr,
643	.ndo_validate_addr	= eth_validate_addr,
644	.ndo_change_mtu		= ip_tunnel_change_mtu,
645	.ndo_get_stats64	= ip_tunnel_get_stats64,
 
 
646};
647
648static void ipgre_tap_setup(struct net_device *dev)
649{
650	ether_setup(dev);
651	dev->netdev_ops		= &gre_tap_netdev_ops;
 
 
652	ip_tunnel_setup(dev, gre_tap_net_id);
653}
654
655static int ipgre_newlink(struct net *src_net, struct net_device *dev,
656			 struct nlattr *tb[], struct nlattr *data[])
657{
658	struct ip_tunnel_parm p;
 
 
659
660	ipgre_netlink_parms(data, tb, &p);
 
 
 
 
 
 
 
 
 
 
661	return ip_tunnel_newlink(dev, tb, &p);
662}
663
664static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
665			    struct nlattr *data[])
666{
667	struct ip_tunnel_parm p;
 
 
668
669	ipgre_netlink_parms(data, tb, &p);
 
 
 
 
 
 
 
 
 
 
670	return ip_tunnel_changelink(dev, tb, &p);
671}
672
673static size_t ipgre_get_size(const struct net_device *dev)
674{
675	return
676		/* IFLA_GRE_LINK */
677		nla_total_size(4) +
678		/* IFLA_GRE_IFLAGS */
679		nla_total_size(2) +
680		/* IFLA_GRE_OFLAGS */
681		nla_total_size(2) +
682		/* IFLA_GRE_IKEY */
683		nla_total_size(4) +
684		/* IFLA_GRE_OKEY */
685		nla_total_size(4) +
686		/* IFLA_GRE_LOCAL */
687		nla_total_size(4) +
688		/* IFLA_GRE_REMOTE */
689		nla_total_size(4) +
690		/* IFLA_GRE_TTL */
691		nla_total_size(1) +
692		/* IFLA_GRE_TOS */
693		nla_total_size(1) +
694		/* IFLA_GRE_PMTUDISC */
695		nla_total_size(1) +
 
 
 
 
 
 
 
 
 
 
 
 
696		0;
697}
698
699static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
700{
701	struct ip_tunnel *t = netdev_priv(dev);
702	struct ip_tunnel_parm *p = &t->parms;
703
704	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
705	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
706	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
 
 
707	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
708	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
709	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
710	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
711	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
712	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
713	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
714		       !!(p->iph.frag_off & htons(IP_DF))))
715		goto nla_put_failure;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716	return 0;
717
718nla_put_failure:
719	return -EMSGSIZE;
720}
721
722static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
723	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
724	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
725	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
726	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
727	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
728	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
729	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
730	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
731	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
732	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
 
 
 
 
 
 
733};
734
735static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
736	.kind		= "gre",
737	.maxtype	= IFLA_GRE_MAX,
738	.policy		= ipgre_policy,
739	.priv_size	= sizeof(struct ip_tunnel),
740	.setup		= ipgre_tunnel_setup,
741	.validate	= ipgre_tunnel_validate,
742	.newlink	= ipgre_newlink,
743	.changelink	= ipgre_changelink,
744	.dellink	= ip_tunnel_dellink,
745	.get_size	= ipgre_get_size,
746	.fill_info	= ipgre_fill_info,
 
747};
748
749static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
750	.kind		= "gretap",
751	.maxtype	= IFLA_GRE_MAX,
752	.policy		= ipgre_policy,
753	.priv_size	= sizeof(struct ip_tunnel),
754	.setup		= ipgre_tap_setup,
755	.validate	= ipgre_tap_validate,
756	.newlink	= ipgre_newlink,
757	.changelink	= ipgre_changelink,
758	.dellink	= ip_tunnel_dellink,
759	.get_size	= ipgre_get_size,
760	.fill_info	= ipgre_fill_info,
 
761};
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763static int __net_init ipgre_tap_init_net(struct net *net)
764{
765	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
766}
767
768static void __net_exit ipgre_tap_exit_net(struct net *net)
769{
770	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
771	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
772}
773
774static struct pernet_operations ipgre_tap_net_ops = {
775	.init = ipgre_tap_init_net,
776	.exit = ipgre_tap_exit_net,
777	.id   = &gre_tap_net_id,
778	.size = sizeof(struct ip_tunnel_net),
779};
780
781static int __init ipgre_init(void)
782{
783	int err;
784
785	pr_info("GRE over IPv4 tunneling driver\n");
786
787	err = register_pernet_device(&ipgre_net_ops);
788	if (err < 0)
789		return err;
790
791	err = register_pernet_device(&ipgre_tap_net_ops);
792	if (err < 0)
793		goto pnet_tap_faied;
794
795	err = gre_cisco_register(&ipgre_protocol);
796	if (err < 0) {
797		pr_info("%s: can't add protocol\n", __func__);
798		goto add_proto_failed;
799	}
800
801	err = rtnl_link_register(&ipgre_link_ops);
802	if (err < 0)
803		goto rtnl_link_failed;
804
805	err = rtnl_link_register(&ipgre_tap_ops);
806	if (err < 0)
807		goto tap_ops_failed;
808
809	return 0;
810
811tap_ops_failed:
812	rtnl_link_unregister(&ipgre_link_ops);
813rtnl_link_failed:
814	gre_cisco_unregister(&ipgre_protocol);
815add_proto_failed:
816	unregister_pernet_device(&ipgre_tap_net_ops);
817pnet_tap_faied:
818	unregister_pernet_device(&ipgre_net_ops);
819	return err;
820}
821
822static void __exit ipgre_fini(void)
823{
824	rtnl_link_unregister(&ipgre_tap_ops);
825	rtnl_link_unregister(&ipgre_link_ops);
826	gre_cisco_unregister(&ipgre_protocol);
827	unregister_pernet_device(&ipgre_tap_net_ops);
828	unregister_pernet_device(&ipgre_net_ops);
829}
830
831module_init(ipgre_init);
832module_exit(ipgre_fini);
833MODULE_LICENSE("GPL");
834MODULE_ALIAS_RTNL_LINK("gre");
835MODULE_ALIAS_RTNL_LINK("gretap");
836MODULE_ALIAS_NETDEV("gre0");
837MODULE_ALIAS_NETDEV("gretap0");
v4.10.11
   1/*
   2 *	Linux NET3:	GRE over IP protocol decoder.
   3 *
   4 *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *	This program is free software; you can redistribute it and/or
   7 *	modify it under the terms of the GNU General Public License
   8 *	as published by the Free Software Foundation; either version
   9 *	2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/capability.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/slab.h>
  20#include <linux/uaccess.h>
  21#include <linux/skbuff.h>
  22#include <linux/netdevice.h>
  23#include <linux/in.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/if_arp.h>
  27#include <linux/if_vlan.h>
  28#include <linux/init.h>
  29#include <linux/in6.h>
  30#include <linux/inetdevice.h>
  31#include <linux/igmp.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/etherdevice.h>
  34#include <linux/if_ether.h>
  35
  36#include <net/sock.h>
  37#include <net/ip.h>
  38#include <net/icmp.h>
  39#include <net/protocol.h>
  40#include <net/ip_tunnels.h>
  41#include <net/arp.h>
  42#include <net/checksum.h>
  43#include <net/dsfield.h>
  44#include <net/inet_ecn.h>
  45#include <net/xfrm.h>
  46#include <net/net_namespace.h>
  47#include <net/netns/generic.h>
  48#include <net/rtnetlink.h>
  49#include <net/gre.h>
  50#include <net/dst_metadata.h>
 
 
 
 
 
  51
  52/*
  53   Problems & solutions
  54   --------------------
  55
  56   1. The most important issue is detecting local dead loops.
  57   They would cause complete host lockup in transmit, which
  58   would be "resolved" by stack overflow or, if queueing is enabled,
  59   with infinite looping in net_bh.
  60
  61   We cannot track such dead loops during route installation,
  62   it is infeasible task. The most general solutions would be
  63   to keep skb->encapsulation counter (sort of local ttl),
  64   and silently drop packet when it expires. It is a good
  65   solution, but it supposes maintaining new variable in ALL
  66   skb, even if no tunneling is used.
  67
  68   Current solution: xmit_recursion breaks dead loops. This is a percpu
  69   counter, since when we enter the first ndo_xmit(), cpu migration is
  70   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  71
  72   2. Networking dead loops would not kill routers, but would really
  73   kill network. IP hop limit plays role of "t->recursion" in this case,
  74   if we copy it from packet being encapsulated to upper header.
  75   It is very good solution, but it introduces two problems:
  76
  77   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  78     do not work over tunnels.
  79   - traceroute does not work. I planned to relay ICMP from tunnel,
  80     so that this problem would be solved and traceroute output
  81     would even more informative. This idea appeared to be wrong:
  82     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  83     true router now :-)), all routers (at least, in neighbourhood of mine)
  84     return only 8 bytes of payload. It is the end.
  85
  86   Hence, if we want that OSPF worked or traceroute said something reasonable,
  87   we should search for another solution.
  88
  89   One of them is to parse packet trying to detect inner encapsulation
  90   made by our node. It is difficult or even impossible, especially,
  91   taking into account fragmentation. TO be short, ttl is not solution at all.
  92
  93   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  94   We force DF flag on tunnels with preconfigured hop limit,
  95   that is ALL. :-) Well, it does not remove the problem completely,
  96   but exponential growth of network traffic is changed to linear
  97   (branches, that exceed pmtu are pruned) and tunnel mtu
  98   rapidly degrades to value <68, where looping stops.
  99   Yes, it is not good if there exists a router in the loop,
 100   which does not force DF, even when encapsulating packets have DF set.
 101   But it is not our problem! Nobody could accuse us, we made
 102   all that we could make. Even if it is your gated who injected
 103   fatal route to network, even if it were you who configured
 104   fatal static route: you are innocent. :-)
 105
 106   Alexey Kuznetsov.
 107 */
 108
 109static bool log_ecn_error = true;
 110module_param(log_ecn_error, bool, 0644);
 111MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 112
 113static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 114static int ipgre_tunnel_init(struct net_device *dev);
 115
 116static unsigned int ipgre_net_id __read_mostly;
 117static unsigned int gre_tap_net_id __read_mostly;
 118
 119static void ipgre_err(struct sk_buff *skb, u32 info,
 120		      const struct tnl_ptk_info *tpi)
 121{
 122
 123	/* All the routers (except for Linux) return only
 124	   8 bytes of packet payload. It means, that precise relaying of
 125	   ICMP in the real Internet is absolutely infeasible.
 126
 127	   Moreover, Cisco "wise men" put GRE key to the third word
 128	   in GRE header. It makes impossible maintaining even soft
 129	   state for keyed GRE tunnels with enabled checksum. Tell
 130	   them "thank you".
 131
 132	   Well, I wonder, rfc1812 was written by Cisco employee,
 133	   what the hell these idiots break standards established
 134	   by themselves???
 135	   */
 136	struct net *net = dev_net(skb->dev);
 137	struct ip_tunnel_net *itn;
 138	const struct iphdr *iph;
 139	const int type = icmp_hdr(skb)->type;
 140	const int code = icmp_hdr(skb)->code;
 141	unsigned int data_len = 0;
 142	struct ip_tunnel *t;
 143
 144	switch (type) {
 145	default:
 146	case ICMP_PARAMETERPROB:
 147		return;
 148
 149	case ICMP_DEST_UNREACH:
 150		switch (code) {
 151		case ICMP_SR_FAILED:
 152		case ICMP_PORT_UNREACH:
 153			/* Impossible event. */
 154			return;
 155		default:
 156			/* All others are translated to HOST_UNREACH.
 157			   rfc2003 contains "deep thoughts" about NET_UNREACH,
 158			   I believe they are just ether pollution. --ANK
 159			 */
 160			break;
 161		}
 162		break;
 163
 164	case ICMP_TIME_EXCEEDED:
 165		if (code != ICMP_EXC_TTL)
 166			return;
 167		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 168		break;
 169
 170	case ICMP_REDIRECT:
 171		break;
 172	}
 173
 174	if (tpi->proto == htons(ETH_P_TEB))
 175		itn = net_generic(net, gre_tap_net_id);
 176	else
 177		itn = net_generic(net, ipgre_net_id);
 178
 179	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
 180	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 181			     iph->daddr, iph->saddr, tpi->key);
 182
 183	if (!t)
 184		return;
 185
 186#if IS_ENABLED(CONFIG_IPV6)
 187       if (tpi->proto == htons(ETH_P_IPV6) &&
 188           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
 189				       type, data_len))
 190               return;
 191#endif
 192
 193	if (t->parms.iph.daddr == 0 ||
 194	    ipv4_is_multicast(t->parms.iph.daddr))
 195		return;
 196
 197	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 198		return;
 199
 200	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 201		t->err_count++;
 202	else
 203		t->err_count = 1;
 204	t->err_time = jiffies;
 205}
 206
 207static void gre_err(struct sk_buff *skb, u32 info)
 208{
 209	/* All the routers (except for Linux) return only
 210	 * 8 bytes of packet payload. It means, that precise relaying of
 211	 * ICMP in the real Internet is absolutely infeasible.
 212	 *
 213	 * Moreover, Cisco "wise men" put GRE key to the third word
 214	 * in GRE header. It makes impossible maintaining even soft
 215	 * state for keyed
 216	 * GRE tunnels with enabled checksum. Tell them "thank you".
 217	 *
 218	 * Well, I wonder, rfc1812 was written by Cisco employee,
 219	 * what the hell these idiots break standards established
 220	 * by themselves???
 221	 */
 222
 223	const struct iphdr *iph = (struct iphdr *)skb->data;
 224	const int type = icmp_hdr(skb)->type;
 225	const int code = icmp_hdr(skb)->code;
 226	struct tnl_ptk_info tpi;
 227	bool csum_err = false;
 228
 229	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
 230			     iph->ihl * 4) < 0) {
 231		if (!csum_err)		/* ignore csum errors. */
 232			return;
 233	}
 234
 235	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 236		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 237				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
 238		return;
 239	}
 240	if (type == ICMP_REDIRECT) {
 241		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
 242			      IPPROTO_GRE, 0);
 243		return;
 244	}
 245
 246	ipgre_err(skb, info, &tpi);
 247}
 248
 249static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 250		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 251{
 252	struct metadata_dst *tun_dst = NULL;
 253	const struct iphdr *iph;
 254	struct ip_tunnel *tunnel;
 255
 256	iph = ip_hdr(skb);
 257	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 258				  iph->saddr, iph->daddr, tpi->key);
 259
 260	if (tunnel) {
 261		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
 262					   raw_proto, false) < 0)
 263			goto drop;
 264
 265		if (tunnel->dev->type != ARPHRD_NONE)
 266			skb_pop_mac_header(skb);
 267		else
 268			skb_reset_mac_header(skb);
 269		if (tunnel->collect_md) {
 270			__be16 flags;
 271			__be64 tun_id;
 272
 273			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
 274			tun_id = key32_to_tunnel_id(tpi->key);
 275			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 276			if (!tun_dst)
 277				return PACKET_REJECT;
 278		}
 279
 280		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 281		return PACKET_RCVD;
 282	}
 283	return PACKET_NEXT;
 284
 285drop:
 286	kfree_skb(skb);
 287	return PACKET_RCVD;
 288}
 289
 290static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 291		     int hdr_len)
 292{
 293	struct net *net = dev_net(skb->dev);
 294	struct ip_tunnel_net *itn;
 295	int res;
 
 296
 297	if (tpi->proto == htons(ETH_P_TEB))
 298		itn = net_generic(net, gre_tap_net_id);
 299	else
 300		itn = net_generic(net, ipgre_net_id);
 301
 302	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
 303	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
 304		/* ipgre tunnels in collect metadata mode should receive
 305		 * also ETH_P_TEB traffic.
 306		 */
 307		itn = net_generic(net, ipgre_net_id);
 308		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
 309	}
 310	return res;
 311}
 312
 313static int gre_rcv(struct sk_buff *skb)
 314{
 315	struct tnl_ptk_info tpi;
 316	bool csum_err = false;
 317	int hdr_len;
 318
 319#ifdef CONFIG_NET_IPGRE_BROADCAST
 320	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
 321		/* Looped back packet, drop it! */
 322		if (rt_is_output_route(skb_rtable(skb)))
 323			goto drop;
 324	}
 325#endif
 326
 327	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 328	if (hdr_len < 0)
 329		goto drop;
 330
 331	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 332		return 0;
 333
 334	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 335drop:
 336	kfree_skb(skb);
 337	return 0;
 338}
 339
 340static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 341		       const struct iphdr *tnl_params,
 342		       __be16 proto)
 343{
 344	struct ip_tunnel *tunnel = netdev_priv(dev);
 
 345
 
 
 
 346	if (tunnel->parms.o_flags & TUNNEL_SEQ)
 347		tunnel->o_seqno++;
 
 348
 349	/* Push GRE header. */
 350	gre_build_header(skb, tunnel->tun_hlen,
 351			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
 352			 htonl(tunnel->o_seqno));
 353
 354	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 355}
 356
 357static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 358{
 359	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 360}
 361
 362static struct rtable *gre_get_rt(struct sk_buff *skb,
 363				 struct net_device *dev,
 364				 struct flowi4 *fl,
 365				 const struct ip_tunnel_key *key)
 366{
 367	struct net *net = dev_net(dev);
 368
 369	memset(fl, 0, sizeof(*fl));
 370	fl->daddr = key->u.ipv4.dst;
 371	fl->saddr = key->u.ipv4.src;
 372	fl->flowi4_tos = RT_TOS(key->tos);
 373	fl->flowi4_mark = skb->mark;
 374	fl->flowi4_proto = IPPROTO_GRE;
 375
 376	return ip_route_output_key(net, fl);
 377}
 378
 379static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 380			__be16 proto)
 381{
 382	struct ip_tunnel_info *tun_info;
 383	const struct ip_tunnel_key *key;
 384	struct rtable *rt = NULL;
 385	struct flowi4 fl;
 386	int min_headroom;
 387	int tunnel_hlen;
 388	__be16 df, flags;
 389	bool use_cache;
 390	int err;
 391
 392	tun_info = skb_tunnel_info(skb);
 393	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 394		     ip_tunnel_info_af(tun_info) != AF_INET))
 395		goto err_free_skb;
 396
 397	key = &tun_info->key;
 398	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 399	if (use_cache)
 400		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
 401	if (!rt) {
 402		rt = gre_get_rt(skb, dev, &fl, key);
 403		if (IS_ERR(rt))
 404				goto err_free_skb;
 405		if (use_cache)
 406			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 407					  fl.saddr);
 408	}
 409
 410	tunnel_hlen = gre_calc_hlen(key->tun_flags);
 411
 412	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 413			+ tunnel_hlen + sizeof(struct iphdr);
 414	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
 415		int head_delta = SKB_DATA_ALIGN(min_headroom -
 416						skb_headroom(skb) +
 417						16);
 418		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
 419				       0, GFP_ATOMIC);
 420		if (unlikely(err))
 421			goto err_free_rt;
 422	}
 423
 424	/* Push Tunnel header. */
 425	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 426		goto err_free_rt;
 427
 428	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
 429	gre_build_header(skb, tunnel_hlen, flags, proto,
 430			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
 431
 432	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 433
 434	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
 435		      key->tos, key->ttl, df, false);
 436	return;
 437
 438err_free_rt:
 439	ip_rt_put(rt);
 440err_free_skb:
 441	kfree_skb(skb);
 442	dev->stats.tx_dropped++;
 443}
 444
 445static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 446{
 447	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 448	struct rtable *rt;
 449	struct flowi4 fl4;
 450
 451	if (ip_tunnel_info_af(info) != AF_INET)
 452		return -EINVAL;
 453
 454	rt = gre_get_rt(skb, dev, &fl4, &info->key);
 455	if (IS_ERR(rt))
 456		return PTR_ERR(rt);
 457
 458	ip_rt_put(rt);
 459	info->key.u.ipv4.src = fl4.saddr;
 460	return 0;
 461}
 462
 463static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 464			      struct net_device *dev)
 465{
 466	struct ip_tunnel *tunnel = netdev_priv(dev);
 467	const struct iphdr *tnl_params;
 468
 469	if (tunnel->collect_md) {
 470		gre_fb_xmit(skb, dev, skb->protocol);
 471		return NETDEV_TX_OK;
 472	}
 473
 474	if (dev->header_ops) {
 475		/* Need space for new headers */
 476		if (skb_cow_head(skb, dev->needed_headroom -
 477				      (tunnel->hlen + sizeof(struct iphdr))))
 478			goto free_skb;
 479
 480		tnl_params = (const struct iphdr *)skb->data;
 481
 482		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 483		 * to gre header.
 484		 */
 485		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 486		skb_reset_mac_header(skb);
 487	} else {
 488		if (skb_cow_head(skb, dev->needed_headroom))
 489			goto free_skb;
 490
 491		tnl_params = &tunnel->parms.iph;
 492	}
 493
 494	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 495		goto free_skb;
 496
 497	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 498	return NETDEV_TX_OK;
 499
 500free_skb:
 501	kfree_skb(skb);
 
 502	dev->stats.tx_dropped++;
 503	return NETDEV_TX_OK;
 504}
 505
 506static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 507				struct net_device *dev)
 508{
 509	struct ip_tunnel *tunnel = netdev_priv(dev);
 510
 511	if (tunnel->collect_md) {
 512		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 513		return NETDEV_TX_OK;
 514	}
 515
 516	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 517		goto free_skb;
 518
 519	if (skb_cow_head(skb, dev->needed_headroom))
 520		goto free_skb;
 521
 522	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 
 523	return NETDEV_TX_OK;
 524
 525free_skb:
 526	kfree_skb(skb);
 
 527	dev->stats.tx_dropped++;
 528	return NETDEV_TX_OK;
 529}
 530
 531static int ipgre_tunnel_ioctl(struct net_device *dev,
 532			      struct ifreq *ifr, int cmd)
 533{
 534	int err;
 535	struct ip_tunnel_parm p;
 536
 537	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 538		return -EFAULT;
 539	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 540		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 541		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
 542		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
 543			return -EINVAL;
 544	}
 545	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 546	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 547
 548	err = ip_tunnel_ioctl(dev, &p, cmd);
 549	if (err)
 550		return err;
 551
 552	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
 553	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
 554
 555	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 556		return -EFAULT;
 557	return 0;
 558}
 559
 560/* Nice toy. Unfortunately, useless in real life :-)
 561   It allows to construct virtual multiprotocol broadcast "LAN"
 562   over the Internet, provided multicast routing is tuned.
 563
 564
 565   I have no idea was this bicycle invented before me,
 566   so that I had to set ARPHRD_IPGRE to a random value.
 567   I have an impression, that Cisco could make something similar,
 568   but this feature is apparently missing in IOS<=11.2(8).
 569
 570   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 571   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 572
 573   ping -t 255 224.66.66.66
 574
 575   If nobody answers, mbone does not work.
 576
 577   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 578   ip addr add 10.66.66.<somewhat>/24 dev Universe
 579   ifconfig Universe up
 580   ifconfig Universe add fe80::<Your_real_addr>/10
 581   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 582   ftp 10.66.66.66
 583   ...
 584   ftp fec0:6666:6666::193.233.7.65
 585   ...
 586 */
 587static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 588			unsigned short type,
 589			const void *daddr, const void *saddr, unsigned int len)
 590{
 591	struct ip_tunnel *t = netdev_priv(dev);
 592	struct iphdr *iph;
 593	struct gre_base_hdr *greh;
 594
 595	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
 596	greh = (struct gre_base_hdr *)(iph+1);
 597	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
 598	greh->protocol = htons(type);
 599
 600	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 601
 602	/* Set the source hardware address. */
 603	if (saddr)
 604		memcpy(&iph->saddr, saddr, 4);
 605	if (daddr)
 606		memcpy(&iph->daddr, daddr, 4);
 607	if (iph->daddr)
 608		return t->hlen + sizeof(*iph);
 609
 610	return -(t->hlen + sizeof(*iph));
 611}
 612
 613static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 614{
 615	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 616	memcpy(haddr, &iph->saddr, 4);
 617	return 4;
 618}
 619
 620static const struct header_ops ipgre_header_ops = {
 621	.create	= ipgre_header,
 622	.parse	= ipgre_header_parse,
 623};
 624
 625#ifdef CONFIG_NET_IPGRE_BROADCAST
 626static int ipgre_open(struct net_device *dev)
 627{
 628	struct ip_tunnel *t = netdev_priv(dev);
 629
 630	if (ipv4_is_multicast(t->parms.iph.daddr)) {
 631		struct flowi4 fl4;
 632		struct rtable *rt;
 633
 634		rt = ip_route_output_gre(t->net, &fl4,
 635					 t->parms.iph.daddr,
 636					 t->parms.iph.saddr,
 637					 t->parms.o_key,
 638					 RT_TOS(t->parms.iph.tos),
 639					 t->parms.link);
 640		if (IS_ERR(rt))
 641			return -EADDRNOTAVAIL;
 642		dev = rt->dst.dev;
 643		ip_rt_put(rt);
 644		if (!__in_dev_get_rtnl(dev))
 645			return -EADDRNOTAVAIL;
 646		t->mlink = dev->ifindex;
 647		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 648	}
 649	return 0;
 650}
 651
 652static int ipgre_close(struct net_device *dev)
 653{
 654	struct ip_tunnel *t = netdev_priv(dev);
 655
 656	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 657		struct in_device *in_dev;
 658		in_dev = inetdev_by_index(t->net, t->mlink);
 659		if (in_dev)
 660			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 661	}
 662	return 0;
 663}
 664#endif
 665
 666static const struct net_device_ops ipgre_netdev_ops = {
 667	.ndo_init		= ipgre_tunnel_init,
 668	.ndo_uninit		= ip_tunnel_uninit,
 669#ifdef CONFIG_NET_IPGRE_BROADCAST
 670	.ndo_open		= ipgre_open,
 671	.ndo_stop		= ipgre_close,
 672#endif
 673	.ndo_start_xmit		= ipgre_xmit,
 674	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
 675	.ndo_change_mtu		= ip_tunnel_change_mtu,
 676	.ndo_get_stats64	= ip_tunnel_get_stats64,
 677	.ndo_get_iflink		= ip_tunnel_get_iflink,
 678};
 679
 680#define GRE_FEATURES (NETIF_F_SG |		\
 681		      NETIF_F_FRAGLIST |	\
 682		      NETIF_F_HIGHDMA |		\
 683		      NETIF_F_HW_CSUM)
 684
 685static void ipgre_tunnel_setup(struct net_device *dev)
 686{
 687	dev->netdev_ops		= &ipgre_netdev_ops;
 688	dev->type		= ARPHRD_IPGRE;
 689	ip_tunnel_setup(dev, ipgre_net_id);
 690}
 691
 692static void __gre_tunnel_init(struct net_device *dev)
 693{
 694	struct ip_tunnel *tunnel;
 695	int t_hlen;
 696
 697	tunnel = netdev_priv(dev);
 698	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 699	tunnel->parms.iph.protocol = IPPROTO_GRE;
 700
 701	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 702
 703	t_hlen = tunnel->hlen + sizeof(struct iphdr);
 704
 705	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
 706	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
 707
 708	dev->features		|= GRE_FEATURES;
 709	dev->hw_features	|= GRE_FEATURES;
 710
 711	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 712		/* TCP offload with GRE SEQ is not supported, nor
 713		 * can we support 2 levels of outer headers requiring
 714		 * an update.
 715		 */
 716		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
 717		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
 718			dev->features    |= NETIF_F_GSO_SOFTWARE;
 719			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 720		}
 721
 722		/* Can use a lockless transmit, unless we generate
 723		 * output sequences
 724		 */
 725		dev->features |= NETIF_F_LLTX;
 726	}
 727}
 728
 729static int ipgre_tunnel_init(struct net_device *dev)
 730{
 731	struct ip_tunnel *tunnel = netdev_priv(dev);
 732	struct iphdr *iph = &tunnel->parms.iph;
 733
 734	__gre_tunnel_init(dev);
 735
 736	memcpy(dev->dev_addr, &iph->saddr, 4);
 737	memcpy(dev->broadcast, &iph->daddr, 4);
 738
 739	dev->flags		= IFF_NOARP;
 740	netif_keep_dst(dev);
 741	dev->addr_len		= 4;
 742
 743	if (iph->daddr && !tunnel->collect_md) {
 744#ifdef CONFIG_NET_IPGRE_BROADCAST
 745		if (ipv4_is_multicast(iph->daddr)) {
 746			if (!iph->saddr)
 747				return -EINVAL;
 748			dev->flags = IFF_BROADCAST;
 749			dev->header_ops = &ipgre_header_ops;
 750		}
 751#endif
 752	} else if (!tunnel->collect_md) {
 753		dev->header_ops = &ipgre_header_ops;
 754	}
 755
 756	return ip_tunnel_init(dev);
 757}
 758
 759static const struct gre_protocol ipgre_protocol = {
 760	.handler     = gre_rcv,
 761	.err_handler = gre_err,
 
 762};
 763
 764static int __net_init ipgre_init_net(struct net *net)
 765{
 766	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 767}
 768
 769static void __net_exit ipgre_exit_net(struct net *net)
 770{
 771	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
 772	ip_tunnel_delete_net(itn, &ipgre_link_ops);
 773}
 774
 775static struct pernet_operations ipgre_net_ops = {
 776	.init = ipgre_init_net,
 777	.exit = ipgre_exit_net,
 778	.id   = &ipgre_net_id,
 779	.size = sizeof(struct ip_tunnel_net),
 780};
 781
 782static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 783{
 784	__be16 flags;
 785
 786	if (!data)
 787		return 0;
 788
 789	flags = 0;
 790	if (data[IFLA_GRE_IFLAGS])
 791		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
 792	if (data[IFLA_GRE_OFLAGS])
 793		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
 794	if (flags & (GRE_VERSION|GRE_ROUTING))
 795		return -EINVAL;
 796
 797	if (data[IFLA_GRE_COLLECT_METADATA] &&
 798	    data[IFLA_GRE_ENCAP_TYPE] &&
 799	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
 800		return -EINVAL;
 801
 802	return 0;
 803}
 804
 805static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
 806{
 807	__be32 daddr;
 808
 809	if (tb[IFLA_ADDRESS]) {
 810		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
 811			return -EINVAL;
 812		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 813			return -EADDRNOTAVAIL;
 814	}
 815
 816	if (!data)
 817		goto out;
 818
 819	if (data[IFLA_GRE_REMOTE]) {
 820		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
 821		if (!daddr)
 822			return -EINVAL;
 823	}
 824
 825out:
 826	return ipgre_tunnel_validate(tb, data);
 827}
 828
 829static int ipgre_netlink_parms(struct net_device *dev,
 830				struct nlattr *data[],
 831				struct nlattr *tb[],
 832				struct ip_tunnel_parm *parms)
 833{
 834	struct ip_tunnel *t = netdev_priv(dev);
 835
 836	memset(parms, 0, sizeof(*parms));
 837
 838	parms->iph.protocol = IPPROTO_GRE;
 839
 840	if (!data)
 841		return 0;
 842
 843	if (data[IFLA_GRE_LINK])
 844		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
 845
 846	if (data[IFLA_GRE_IFLAGS])
 847		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
 848
 849	if (data[IFLA_GRE_OFLAGS])
 850		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
 851
 852	if (data[IFLA_GRE_IKEY])
 853		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
 854
 855	if (data[IFLA_GRE_OKEY])
 856		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
 857
 858	if (data[IFLA_GRE_LOCAL])
 859		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
 860
 861	if (data[IFLA_GRE_REMOTE])
 862		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
 863
 864	if (data[IFLA_GRE_TTL])
 865		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
 866
 867	if (data[IFLA_GRE_TOS])
 868		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
 869
 870	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
 871		if (t->ignore_df)
 872			return -EINVAL;
 873		parms->iph.frag_off = htons(IP_DF);
 874	}
 875
 876	if (data[IFLA_GRE_COLLECT_METADATA]) {
 877		t->collect_md = true;
 878		if (dev->type == ARPHRD_IPGRE)
 879			dev->type = ARPHRD_NONE;
 880	}
 881
 882	if (data[IFLA_GRE_IGNORE_DF]) {
 883		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
 884		  && (parms->iph.frag_off & htons(IP_DF)))
 885			return -EINVAL;
 886		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
 887	}
 888
 889	return 0;
 890}
 891
 892/* This function returns true when ENCAP attributes are present in the nl msg */
 893static bool ipgre_netlink_encap_parms(struct nlattr *data[],
 894				      struct ip_tunnel_encap *ipencap)
 895{
 896	bool ret = false;
 897
 898	memset(ipencap, 0, sizeof(*ipencap));
 899
 900	if (!data)
 901		return ret;
 902
 903	if (data[IFLA_GRE_ENCAP_TYPE]) {
 904		ret = true;
 905		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
 906	}
 907
 908	if (data[IFLA_GRE_ENCAP_FLAGS]) {
 909		ret = true;
 910		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
 911	}
 912
 913	if (data[IFLA_GRE_ENCAP_SPORT]) {
 914		ret = true;
 915		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
 916	}
 917
 918	if (data[IFLA_GRE_ENCAP_DPORT]) {
 919		ret = true;
 920		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
 921	}
 922
 923	return ret;
 924}
 925
 926static int gre_tap_init(struct net_device *dev)
 927{
 928	__gre_tunnel_init(dev);
 929	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 930
 931	return ip_tunnel_init(dev);
 932}
 933
 934static const struct net_device_ops gre_tap_netdev_ops = {
 935	.ndo_init		= gre_tap_init,
 936	.ndo_uninit		= ip_tunnel_uninit,
 937	.ndo_start_xmit		= gre_tap_xmit,
 938	.ndo_set_mac_address 	= eth_mac_addr,
 939	.ndo_validate_addr	= eth_validate_addr,
 940	.ndo_change_mtu		= ip_tunnel_change_mtu,
 941	.ndo_get_stats64	= ip_tunnel_get_stats64,
 942	.ndo_get_iflink		= ip_tunnel_get_iflink,
 943	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 944};
 945
 946static void ipgre_tap_setup(struct net_device *dev)
 947{
 948	ether_setup(dev);
 949	dev->netdev_ops	= &gre_tap_netdev_ops;
 950	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 951	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
 952	ip_tunnel_setup(dev, gre_tap_net_id);
 953}
 954
 955static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 956			 struct nlattr *tb[], struct nlattr *data[])
 957{
 958	struct ip_tunnel_parm p;
 959	struct ip_tunnel_encap ipencap;
 960	int err;
 961
 962	if (ipgre_netlink_encap_parms(data, &ipencap)) {
 963		struct ip_tunnel *t = netdev_priv(dev);
 964		err = ip_tunnel_encap_setup(t, &ipencap);
 965
 966		if (err < 0)
 967			return err;
 968	}
 969
 970	err = ipgre_netlink_parms(dev, data, tb, &p);
 971	if (err < 0)
 972		return err;
 973	return ip_tunnel_newlink(dev, tb, &p);
 974}
 975
 976static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 977			    struct nlattr *data[])
 978{
 979	struct ip_tunnel_parm p;
 980	struct ip_tunnel_encap ipencap;
 981	int err;
 982
 983	if (ipgre_netlink_encap_parms(data, &ipencap)) {
 984		struct ip_tunnel *t = netdev_priv(dev);
 985		err = ip_tunnel_encap_setup(t, &ipencap);
 986
 987		if (err < 0)
 988			return err;
 989	}
 990
 991	err = ipgre_netlink_parms(dev, data, tb, &p);
 992	if (err < 0)
 993		return err;
 994	return ip_tunnel_changelink(dev, tb, &p);
 995}
 996
 997static size_t ipgre_get_size(const struct net_device *dev)
 998{
 999	return
1000		/* IFLA_GRE_LINK */
1001		nla_total_size(4) +
1002		/* IFLA_GRE_IFLAGS */
1003		nla_total_size(2) +
1004		/* IFLA_GRE_OFLAGS */
1005		nla_total_size(2) +
1006		/* IFLA_GRE_IKEY */
1007		nla_total_size(4) +
1008		/* IFLA_GRE_OKEY */
1009		nla_total_size(4) +
1010		/* IFLA_GRE_LOCAL */
1011		nla_total_size(4) +
1012		/* IFLA_GRE_REMOTE */
1013		nla_total_size(4) +
1014		/* IFLA_GRE_TTL */
1015		nla_total_size(1) +
1016		/* IFLA_GRE_TOS */
1017		nla_total_size(1) +
1018		/* IFLA_GRE_PMTUDISC */
1019		nla_total_size(1) +
1020		/* IFLA_GRE_ENCAP_TYPE */
1021		nla_total_size(2) +
1022		/* IFLA_GRE_ENCAP_FLAGS */
1023		nla_total_size(2) +
1024		/* IFLA_GRE_ENCAP_SPORT */
1025		nla_total_size(2) +
1026		/* IFLA_GRE_ENCAP_DPORT */
1027		nla_total_size(2) +
1028		/* IFLA_GRE_COLLECT_METADATA */
1029		nla_total_size(0) +
1030		/* IFLA_GRE_IGNORE_DF */
1031		nla_total_size(1) +
1032		0;
1033}
1034
1035static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1036{
1037	struct ip_tunnel *t = netdev_priv(dev);
1038	struct ip_tunnel_parm *p = &t->parms;
1039
1040	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1041	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1042			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1043	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1044			 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1045	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1046	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1047	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1048	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1049	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1050	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1051	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1052		       !!(p->iph.frag_off & htons(IP_DF))))
1053		goto nla_put_failure;
1054
1055	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1056			t->encap.type) ||
1057	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1058			 t->encap.sport) ||
1059	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1060			 t->encap.dport) ||
1061	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1062			t->encap.flags))
1063		goto nla_put_failure;
1064
1065	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1066		goto nla_put_failure;
1067
1068	if (t->collect_md) {
1069		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1070			goto nla_put_failure;
1071	}
1072
1073	return 0;
1074
1075nla_put_failure:
1076	return -EMSGSIZE;
1077}
1078
1079static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1080	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1081	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1082	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1083	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1084	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1085	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1086	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1087	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1088	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1089	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1090	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1091	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1092	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1093	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1094	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1095	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1096};
1097
1098static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1099	.kind		= "gre",
1100	.maxtype	= IFLA_GRE_MAX,
1101	.policy		= ipgre_policy,
1102	.priv_size	= sizeof(struct ip_tunnel),
1103	.setup		= ipgre_tunnel_setup,
1104	.validate	= ipgre_tunnel_validate,
1105	.newlink	= ipgre_newlink,
1106	.changelink	= ipgre_changelink,
1107	.dellink	= ip_tunnel_dellink,
1108	.get_size	= ipgre_get_size,
1109	.fill_info	= ipgre_fill_info,
1110	.get_link_net	= ip_tunnel_get_link_net,
1111};
1112
1113static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1114	.kind		= "gretap",
1115	.maxtype	= IFLA_GRE_MAX,
1116	.policy		= ipgre_policy,
1117	.priv_size	= sizeof(struct ip_tunnel),
1118	.setup		= ipgre_tap_setup,
1119	.validate	= ipgre_tap_validate,
1120	.newlink	= ipgre_newlink,
1121	.changelink	= ipgre_changelink,
1122	.dellink	= ip_tunnel_dellink,
1123	.get_size	= ipgre_get_size,
1124	.fill_info	= ipgre_fill_info,
1125	.get_link_net	= ip_tunnel_get_link_net,
1126};
1127
1128struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1129					u8 name_assign_type)
1130{
1131	struct nlattr *tb[IFLA_MAX + 1];
1132	struct net_device *dev;
1133	LIST_HEAD(list_kill);
1134	struct ip_tunnel *t;
1135	int err;
1136
1137	memset(&tb, 0, sizeof(tb));
1138
1139	dev = rtnl_create_link(net, name, name_assign_type,
1140			       &ipgre_tap_ops, tb);
1141	if (IS_ERR(dev))
1142		return dev;
1143
1144	/* Configure flow based GRE device. */
1145	t = netdev_priv(dev);
1146	t->collect_md = true;
1147
1148	err = ipgre_newlink(net, dev, tb, NULL);
1149	if (err < 0) {
1150		free_netdev(dev);
1151		return ERR_PTR(err);
1152	}
1153
1154	/* openvswitch users expect packet sizes to be unrestricted,
1155	 * so set the largest MTU we can.
1156	 */
1157	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1158	if (err)
1159		goto out;
1160
1161	err = rtnl_configure_link(dev, NULL);
1162	if (err < 0)
1163		goto out;
1164
1165	return dev;
1166out:
1167	ip_tunnel_dellink(dev, &list_kill);
1168	unregister_netdevice_many(&list_kill);
1169	return ERR_PTR(err);
1170}
1171EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1172
1173static int __net_init ipgre_tap_init_net(struct net *net)
1174{
1175	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1176}
1177
1178static void __net_exit ipgre_tap_exit_net(struct net *net)
1179{
1180	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1181	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1182}
1183
1184static struct pernet_operations ipgre_tap_net_ops = {
1185	.init = ipgre_tap_init_net,
1186	.exit = ipgre_tap_exit_net,
1187	.id   = &gre_tap_net_id,
1188	.size = sizeof(struct ip_tunnel_net),
1189};
1190
1191static int __init ipgre_init(void)
1192{
1193	int err;
1194
1195	pr_info("GRE over IPv4 tunneling driver\n");
1196
1197	err = register_pernet_device(&ipgre_net_ops);
1198	if (err < 0)
1199		return err;
1200
1201	err = register_pernet_device(&ipgre_tap_net_ops);
1202	if (err < 0)
1203		goto pnet_tap_faied;
1204
1205	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1206	if (err < 0) {
1207		pr_info("%s: can't add protocol\n", __func__);
1208		goto add_proto_failed;
1209	}
1210
1211	err = rtnl_link_register(&ipgre_link_ops);
1212	if (err < 0)
1213		goto rtnl_link_failed;
1214
1215	err = rtnl_link_register(&ipgre_tap_ops);
1216	if (err < 0)
1217		goto tap_ops_failed;
1218
1219	return 0;
1220
1221tap_ops_failed:
1222	rtnl_link_unregister(&ipgre_link_ops);
1223rtnl_link_failed:
1224	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1225add_proto_failed:
1226	unregister_pernet_device(&ipgre_tap_net_ops);
1227pnet_tap_faied:
1228	unregister_pernet_device(&ipgre_net_ops);
1229	return err;
1230}
1231
1232static void __exit ipgre_fini(void)
1233{
1234	rtnl_link_unregister(&ipgre_tap_ops);
1235	rtnl_link_unregister(&ipgre_link_ops);
1236	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1237	unregister_pernet_device(&ipgre_tap_net_ops);
1238	unregister_pernet_device(&ipgre_net_ops);
1239}
1240
1241module_init(ipgre_init);
1242module_exit(ipgre_fini);
1243MODULE_LICENSE("GPL");
1244MODULE_ALIAS_RTNL_LINK("gre");
1245MODULE_ALIAS_RTNL_LINK("gretap");
1246MODULE_ALIAS_NETDEV("gre0");
1247MODULE_ALIAS_NETDEV("gretap0");