Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
  1/*
  2 * Copyright (c) 2013 Nicira, Inc.
  3 *
  4 * This program is free software; you can redistribute it and/or
  5 * modify it under the terms of version 2 of the GNU General Public
  6 * License as published by the Free Software Foundation.
  7 *
  8 * This program is distributed in the hope that it will be useful, but
  9 * WITHOUT ANY WARRANTY; without even the implied warranty of
 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11 * General Public License for more details.
 12 *
 13 * You should have received a copy of the GNU General Public License
 14 * along with this program; if not, write to the Free Software
 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 16 * 02110-1301, USA
 17 */
 18
 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20
 21#include <linux/types.h>
 22#include <linux/kernel.h>
 23#include <linux/skbuff.h>
 24#include <linux/netdevice.h>
 25#include <linux/in.h>
 26#include <linux/if_arp.h>
 27#include <linux/init.h>
 28#include <linux/in6.h>
 29#include <linux/inetdevice.h>
 30#include <linux/netfilter_ipv4.h>
 31#include <linux/etherdevice.h>
 32#include <linux/if_ether.h>
 33#include <linux/if_vlan.h>
 34#include <linux/static_key.h>
 35
 36#include <net/ip.h>
 37#include <net/icmp.h>
 38#include <net/protocol.h>
 39#include <net/ip_tunnels.h>
 40#include <net/arp.h>
 41#include <net/checksum.h>
 42#include <net/dsfield.h>
 43#include <net/inet_ecn.h>
 44#include <net/xfrm.h>
 45#include <net/net_namespace.h>
 46#include <net/netns/generic.h>
 47#include <net/rtnetlink.h>
 48#include <net/dst_metadata.h>
 49
 50void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 51		   __be32 src, __be32 dst, __u8 proto,
 52		   __u8 tos, __u8 ttl, __be16 df, bool xnet)
 53{
 54	int pkt_len = skb->len - skb_inner_network_offset(skb);
 55	struct net *net = dev_net(rt->dst.dev);
 56	struct net_device *dev = skb->dev;
 57	struct iphdr *iph;
 58	int err;
 59
 60	skb_scrub_packet(skb, xnet);
 61
 62	skb_clear_hash(skb);
 63	skb_dst_set(skb, &rt->dst);
 64	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 65
 66	/* Push down and install the IP header. */
 67	skb_push(skb, sizeof(struct iphdr));
 68	skb_reset_network_header(skb);
 69
 70	iph = ip_hdr(skb);
 71
 72	iph->version	=	4;
 73	iph->ihl	=	sizeof(struct iphdr) >> 2;
 74	iph->frag_off	=	df;
 75	iph->protocol	=	proto;
 76	iph->tos	=	tos;
 77	iph->daddr	=	dst;
 78	iph->saddr	=	src;
 79	iph->ttl	=	ttl;
 80	__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
 81
 82	err = ip_local_out(net, sk, skb);
 83	if (unlikely(net_xmit_eval(err)))
 84		pkt_len = 0;
 85	iptunnel_xmit_stats(dev, pkt_len);
 86}
 87EXPORT_SYMBOL_GPL(iptunnel_xmit);
 88
 89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
 90			 bool xnet)
 91{
 92	if (unlikely(!pskb_may_pull(skb, hdr_len)))
 93		return -ENOMEM;
 94
 95	skb_pull_rcsum(skb, hdr_len);
 96
 97	if (inner_proto == htons(ETH_P_TEB)) {
 98		struct ethhdr *eh;
 99
100		if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
101			return -ENOMEM;
102
103		eh = (struct ethhdr *)skb->data;
104		if (likely(eth_proto_is_802_3(eh->h_proto)))
105			skb->protocol = eh->h_proto;
106		else
107			skb->protocol = htons(ETH_P_802_2);
108
109	} else {
110		skb->protocol = inner_proto;
111	}
112
113	skb_clear_hash_if_not_l4(skb);
114	skb->vlan_tci = 0;
115	skb_set_queue_mapping(skb, 0);
116	skb_scrub_packet(skb, xnet);
117
118	return iptunnel_pull_offloads(skb);
119}
120EXPORT_SYMBOL_GPL(iptunnel_pull_header);
121
122struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
123					     gfp_t flags)
124{
125	struct metadata_dst *res;
126	struct ip_tunnel_info *dst, *src;
127
128	if (!md || md->u.tun_info.mode & IP_TUNNEL_INFO_TX)
129		return NULL;
130
131	res = metadata_dst_alloc(0, flags);
132	if (!res)
133		return NULL;
134
135	dst = &res->u.tun_info;
136	src = &md->u.tun_info;
137	dst->key.tun_id = src->key.tun_id;
138	if (src->mode & IP_TUNNEL_INFO_IPV6)
139		memcpy(&dst->key.u.ipv6.dst, &src->key.u.ipv6.src,
140		       sizeof(struct in6_addr));
141	else
142		dst->key.u.ipv4.dst = src->key.u.ipv4.src;
143	dst->mode = src->mode | IP_TUNNEL_INFO_TX;
144
145	return res;
146}
147EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
148
149struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
150					 int gso_type_mask)
151{
152	int err;
153
154	if (likely(!skb->encapsulation)) {
155		skb_reset_inner_headers(skb);
156		skb->encapsulation = 1;
157	}
158
159	if (skb_is_gso(skb)) {
160		err = skb_unclone(skb, GFP_ATOMIC);
161		if (unlikely(err))
162			goto error;
163		skb_shinfo(skb)->gso_type |= gso_type_mask;
164		return skb;
165	}
166
167	if (skb->ip_summed != CHECKSUM_PARTIAL) {
168		skb->ip_summed = CHECKSUM_NONE;
169		/* We clear encapsulation here to prevent badly-written
170		 * drivers potentially deciding to offload an inner checksum
171		 * if we set CHECKSUM_PARTIAL on the outer header.
172		 * This should go away when the drivers are all fixed.
173		 */
174		skb->encapsulation = 0;
175	}
176
177	return skb;
178error:
179	kfree_skb(skb);
180	return ERR_PTR(err);
181}
182EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
183
184/* Often modified stats are per cpu, other are shared (netdev->stats) */
185struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
186						struct rtnl_link_stats64 *tot)
187{
188	int i;
189
190	netdev_stats_to_stats64(tot, &dev->stats);
191
192	for_each_possible_cpu(i) {
193		const struct pcpu_sw_netstats *tstats =
194						   per_cpu_ptr(dev->tstats, i);
195		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
196		unsigned int start;
197
198		do {
199			start = u64_stats_fetch_begin_irq(&tstats->syncp);
200			rx_packets = tstats->rx_packets;
201			tx_packets = tstats->tx_packets;
202			rx_bytes = tstats->rx_bytes;
203			tx_bytes = tstats->tx_bytes;
204		} while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
205
206		tot->rx_packets += rx_packets;
207		tot->tx_packets += tx_packets;
208		tot->rx_bytes   += rx_bytes;
209		tot->tx_bytes   += tx_bytes;
210	}
211
212	return tot;
213}
214EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
215
216static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
217	[LWTUNNEL_IP_ID]	= { .type = NLA_U64 },
218	[LWTUNNEL_IP_DST]	= { .type = NLA_U32 },
219	[LWTUNNEL_IP_SRC]	= { .type = NLA_U32 },
220	[LWTUNNEL_IP_TTL]	= { .type = NLA_U8 },
221	[LWTUNNEL_IP_TOS]	= { .type = NLA_U8 },
222	[LWTUNNEL_IP_FLAGS]	= { .type = NLA_U16 },
223};
224
225static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
226			      unsigned int family, const void *cfg,
227			      struct lwtunnel_state **ts)
228{
229	struct ip_tunnel_info *tun_info;
230	struct lwtunnel_state *new_state;
231	struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
232	int err;
233
234	err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy);
235	if (err < 0)
236		return err;
237
238	new_state = lwtunnel_state_alloc(sizeof(*tun_info));
239	if (!new_state)
240		return -ENOMEM;
241
242	new_state->type = LWTUNNEL_ENCAP_IP;
243
244	tun_info = lwt_tun_info(new_state);
245
246	if (tb[LWTUNNEL_IP_ID])
247		tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP_ID]);
248
249	if (tb[LWTUNNEL_IP_DST])
250		tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
251
252	if (tb[LWTUNNEL_IP_SRC])
253		tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]);
254
255	if (tb[LWTUNNEL_IP_TTL])
256		tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]);
257
258	if (tb[LWTUNNEL_IP_TOS])
259		tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
260
261	if (tb[LWTUNNEL_IP_FLAGS])
262		tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP_FLAGS]);
263
264	tun_info->mode = IP_TUNNEL_INFO_TX;
265	tun_info->options_len = 0;
266
267	*ts = new_state;
268
269	return 0;
270}
271
272static int ip_tun_fill_encap_info(struct sk_buff *skb,
273				  struct lwtunnel_state *lwtstate)
274{
275	struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
276
277	if (nla_put_be64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
278	    nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
279	    nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
280	    nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
281	    nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
282	    nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
283		return -ENOMEM;
284
285	return 0;
286}
287
288static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
289{
290	return nla_total_size(8)	/* LWTUNNEL_IP_ID */
291		+ nla_total_size(4)	/* LWTUNNEL_IP_DST */
292		+ nla_total_size(4)	/* LWTUNNEL_IP_SRC */
293		+ nla_total_size(1)	/* LWTUNNEL_IP_TOS */
294		+ nla_total_size(1)	/* LWTUNNEL_IP_TTL */
295		+ nla_total_size(2);	/* LWTUNNEL_IP_FLAGS */
296}
297
298static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
299{
300	return memcmp(lwt_tun_info(a), lwt_tun_info(b),
301		      sizeof(struct ip_tunnel_info));
302}
303
304static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
305	.build_state = ip_tun_build_state,
306	.fill_encap = ip_tun_fill_encap_info,
307	.get_encap_size = ip_tun_encap_nlsize,
308	.cmp_encap = ip_tun_cmp_encap,
309};
310
311static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
312	[LWTUNNEL_IP6_ID]		= { .type = NLA_U64 },
313	[LWTUNNEL_IP6_DST]		= { .len = sizeof(struct in6_addr) },
314	[LWTUNNEL_IP6_SRC]		= { .len = sizeof(struct in6_addr) },
315	[LWTUNNEL_IP6_HOPLIMIT]		= { .type = NLA_U8 },
316	[LWTUNNEL_IP6_TC]		= { .type = NLA_U8 },
317	[LWTUNNEL_IP6_FLAGS]		= { .type = NLA_U16 },
318};
319
320static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
321			       unsigned int family, const void *cfg,
322			       struct lwtunnel_state **ts)
323{
324	struct ip_tunnel_info *tun_info;
325	struct lwtunnel_state *new_state;
326	struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
327	int err;
328
329	err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy);
330	if (err < 0)
331		return err;
332
333	new_state = lwtunnel_state_alloc(sizeof(*tun_info));
334	if (!new_state)
335		return -ENOMEM;
336
337	new_state->type = LWTUNNEL_ENCAP_IP6;
338
339	tun_info = lwt_tun_info(new_state);
340
341	if (tb[LWTUNNEL_IP6_ID])
342		tun_info->key.tun_id = nla_get_be64(tb[LWTUNNEL_IP6_ID]);
343
344	if (tb[LWTUNNEL_IP6_DST])
345		tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]);
346
347	if (tb[LWTUNNEL_IP6_SRC])
348		tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]);
349
350	if (tb[LWTUNNEL_IP6_HOPLIMIT])
351		tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]);
352
353	if (tb[LWTUNNEL_IP6_TC])
354		tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
355
356	if (tb[LWTUNNEL_IP6_FLAGS])
357		tun_info->key.tun_flags = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
358
359	tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
360	tun_info->options_len = 0;
361
362	*ts = new_state;
363
364	return 0;
365}
366
367static int ip6_tun_fill_encap_info(struct sk_buff *skb,
368				   struct lwtunnel_state *lwtstate)
369{
370	struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
371
372	if (nla_put_be64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
373	    nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
374	    nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
375	    nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
376	    nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
377	    nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
378		return -ENOMEM;
379
380	return 0;
381}
382
383static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
384{
385	return nla_total_size(8)	/* LWTUNNEL_IP6_ID */
386		+ nla_total_size(16)	/* LWTUNNEL_IP6_DST */
387		+ nla_total_size(16)	/* LWTUNNEL_IP6_SRC */
388		+ nla_total_size(1)	/* LWTUNNEL_IP6_HOPLIMIT */
389		+ nla_total_size(1)	/* LWTUNNEL_IP6_TC */
390		+ nla_total_size(2);	/* LWTUNNEL_IP6_FLAGS */
391}
392
393static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
394	.build_state = ip6_tun_build_state,
395	.fill_encap = ip6_tun_fill_encap_info,
396	.get_encap_size = ip6_tun_encap_nlsize,
397	.cmp_encap = ip_tun_cmp_encap,
398};
399
400void __init ip_tunnel_core_init(void)
401{
402	/* If you land here, make sure whether increasing ip_tunnel_info's
403	 * options_len is a reasonable choice with its usage in front ends
404	 * (f.e., it's part of flow keys, etc).
405	 */
406	BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255);
407
408	lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
409	lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
410}
411
412struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE;
413EXPORT_SYMBOL(ip_tunnel_metadata_cnt);
414
415void ip_tunnel_need_metadata(void)
416{
417	static_key_slow_inc(&ip_tunnel_metadata_cnt);
418}
419EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata);
420
421void ip_tunnel_unneed_metadata(void)
422{
423	static_key_slow_dec(&ip_tunnel_metadata_cnt);
424}
425EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);