Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include <bpf/bpf_endian.h>
 21#include <bpf/bpf_helpers.h>
 22#include "bpf_compiler.h"
 23
 24#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
 25
 26static const int cfg_port = 8000;
 27
 28static const int cfg_udp_src = 20000;
 29
 30#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 31
 32#define	UDP_PORT		5555
 33#define	MPLS_OVER_UDP_PORT	6635
 34#define	ETH_OVER_UDP_PORT	7777
 35#define	VXLAN_UDP_PORT		8472
 36
 37#define	EXTPROTO_VXLAN	0x1
 38
 39#define	VXLAN_N_VID     (1u << 24)
 40#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
 41#define	VXLAN_FLAGS     0x8
 42#define	VXLAN_VNI       1
 43
 44#ifndef NEXTHDR_DEST
 45#define NEXTHDR_DEST	60
 46#endif
 47
 48/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 49static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 50						     MPLS_LS_S_MASK | 0xff);
 51
 52struct vxlanhdr {
 53	__be32 vx_flags;
 54	__be32 vx_vni;
 55} __attribute__((packed));
 56
 57struct gre_hdr {
 58	__be16 flags;
 59	__be16 protocol;
 60} __attribute__((packed));
 61
 62union l4hdr {
 63	struct udphdr udp;
 64	struct gre_hdr gre;
 65};
 66
 67struct v4hdr {
 68	struct iphdr ip;
 69	union l4hdr l4hdr;
 70	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 71} __attribute__((packed));
 72
 73struct v6hdr {
 74	struct ipv6hdr ip;
 75	union l4hdr l4hdr;
 76	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 77} __attribute__((packed));
 78
 79static __always_inline void set_ipv4_csum(struct iphdr *iph)
 80{
 81	__u16 *iph16 = (__u16 *)iph;
 82	__u32 csum;
 83	int i;
 84
 85	iph->check = 0;
 86
 87	__pragma_loop_unroll_full
 88	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 89		csum += *iph16++;
 90
 91	iph->check = ~((csum & 0xffff) + (csum >> 16));
 92}
 93
 94static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 95					__u16 l2_proto, __u16 ext_proto)
 96{
 97	__u16 udp_dst = UDP_PORT;
 98	struct iphdr iph_inner;
 99	struct v4hdr h_outer;
100	struct tcphdr tcph;
101	int olen, l2_len;
102	__u8 *l2_hdr = NULL;
103	int tcp_off;
104	__u64 flags;
105
106	/* Most tests encapsulate a packet into a tunnel with the same
107	 * network protocol, and derive the outer header fields from
108	 * the inner header.
109	 *
110	 * The 6in4 case tests different inner and outer protocols. As
111	 * the inner is ipv6, but the outer expects an ipv4 header as
112	 * input, manually build a struct iphdr based on the ipv6hdr.
113	 */
114	if (encap_proto == IPPROTO_IPV6) {
115		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117		struct ipv6hdr iph6_inner;
118
119		/* Read the IPv6 header */
120		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121				       sizeof(iph6_inner)) < 0)
122			return TC_ACT_OK;
123
124		/* Derive the IPv4 header fields from the IPv6 header */
125		memset(&iph_inner, 0, sizeof(iph_inner));
126		iph_inner.version = 4;
127		iph_inner.ihl = 5;
128		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129				    bpf_ntohs(iph6_inner.payload_len));
130		iph_inner.ttl = iph6_inner.hop_limit - 1;
131		iph_inner.protocol = iph6_inner.nexthdr;
132		iph_inner.saddr = __bpf_constant_htonl(saddr);
133		iph_inner.daddr = __bpf_constant_htonl(daddr);
134
135		tcp_off = sizeof(iph6_inner);
136	} else {
137		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138				       sizeof(iph_inner)) < 0)
139			return TC_ACT_OK;
140
141		tcp_off = sizeof(iph_inner);
142	}
143
144	/* filter only packets we want */
145	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146		return TC_ACT_OK;
147
148	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149			       &tcph, sizeof(tcph)) < 0)
150		return TC_ACT_OK;
151
152	if (tcph.dest != __bpf_constant_htons(cfg_port))
153		return TC_ACT_OK;
154
155	olen = sizeof(h_outer.ip);
156	l2_len = 0;
157
158	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159
160	switch (l2_proto) {
161	case ETH_P_MPLS_UC:
162		l2_len = sizeof(mpls_label);
163		udp_dst = MPLS_OVER_UDP_PORT;
164		break;
165	case ETH_P_TEB:
166		l2_len = ETH_HLEN;
167		if (ext_proto & EXTPROTO_VXLAN) {
168			udp_dst = VXLAN_UDP_PORT;
169			l2_len += sizeof(struct vxlanhdr);
170		} else
171			udp_dst = ETH_OVER_UDP_PORT;
172		break;
173	}
174	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175
176	switch (encap_proto) {
177	case IPPROTO_GRE:
178		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179		olen += sizeof(h_outer.l4hdr.gre);
180		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181		h_outer.l4hdr.gre.flags = 0;
182		break;
183	case IPPROTO_UDP:
184		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185		olen += sizeof(h_outer.l4hdr.udp);
186		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188		h_outer.l4hdr.udp.check = 0;
189		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190						  sizeof(h_outer.l4hdr.udp) +
191						  l2_len);
192		break;
193	case IPPROTO_IPIP:
194	case IPPROTO_IPV6:
195		break;
196	default:
197		return TC_ACT_OK;
198	}
199
200	/* add L2 encap (if specified) */
201	l2_hdr = (__u8 *)&h_outer + olen;
202	switch (l2_proto) {
203	case ETH_P_MPLS_UC:
204		*(__u32 *)l2_hdr = mpls_label;
205		break;
206	case ETH_P_TEB:
207		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208
209		if (ext_proto & EXTPROTO_VXLAN) {
210			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211
212			vxlan_hdr->vx_flags = VXLAN_FLAGS;
213			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214
215			l2_hdr += sizeof(struct vxlanhdr);
216		}
217
218		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219			return TC_ACT_SHOT;
220
221		break;
222	}
223	olen += l2_len;
224
225	/* add room between mac and network header */
226	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227		return TC_ACT_SHOT;
228
229	/* prepare new outer network header */
230	h_outer.ip = iph_inner;
231	h_outer.ip.tot_len = bpf_htons(olen +
232				       bpf_ntohs(h_outer.ip.tot_len));
233	h_outer.ip.protocol = encap_proto;
234
235	set_ipv4_csum((void *)&h_outer.ip);
236
237	/* store new outer network header */
238	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239				BPF_F_INVALIDATE_HASH) < 0)
240		return TC_ACT_SHOT;
241
242	/* if changing outer proto type, update eth->h_proto */
243	if (encap_proto == IPPROTO_IPV6) {
244		struct ethhdr eth;
245
246		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
247			return TC_ACT_SHOT;
248		eth.h_proto = bpf_htons(ETH_P_IP);
249		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
250			return TC_ACT_SHOT;
251	}
252
253	return TC_ACT_OK;
254}
255
256static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257				      __u16 l2_proto)
258{
259	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260}
261
262static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263					__u16 l2_proto, __u16 ext_proto)
264{
265	__u16 udp_dst = UDP_PORT;
266	struct ipv6hdr iph_inner;
267	struct v6hdr h_outer;
268	struct tcphdr tcph;
269	int olen, l2_len;
270	__u8 *l2_hdr = NULL;
271	__u16 tot_len;
272	__u64 flags;
273
274	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275			       sizeof(iph_inner)) < 0)
276		return TC_ACT_OK;
277
278	/* filter only packets we want */
279	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280			       &tcph, sizeof(tcph)) < 0)
281		return TC_ACT_OK;
282
283	if (tcph.dest != __bpf_constant_htons(cfg_port))
284		return TC_ACT_OK;
285
286	olen = sizeof(h_outer.ip);
287	l2_len = 0;
288
289	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290
291	switch (l2_proto) {
292	case ETH_P_MPLS_UC:
293		l2_len = sizeof(mpls_label);
294		udp_dst = MPLS_OVER_UDP_PORT;
295		break;
296	case ETH_P_TEB:
297		l2_len = ETH_HLEN;
298		if (ext_proto & EXTPROTO_VXLAN) {
299			udp_dst = VXLAN_UDP_PORT;
300			l2_len += sizeof(struct vxlanhdr);
301		} else
302			udp_dst = ETH_OVER_UDP_PORT;
303		break;
304	}
305	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306
307	switch (encap_proto) {
308	case IPPROTO_GRE:
309		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310		olen += sizeof(h_outer.l4hdr.gre);
311		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312		h_outer.l4hdr.gre.flags = 0;
313		break;
314	case IPPROTO_UDP:
315		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316		olen += sizeof(h_outer.l4hdr.udp);
317		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320			  sizeof(h_outer.l4hdr.udp) + l2_len;
321		h_outer.l4hdr.udp.check = 0;
322		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323		break;
324	case IPPROTO_IPV6:
325		break;
326	default:
327		return TC_ACT_OK;
328	}
329
330	/* add L2 encap (if specified) */
331	l2_hdr = (__u8 *)&h_outer + olen;
332	switch (l2_proto) {
333	case ETH_P_MPLS_UC:
334		*(__u32 *)l2_hdr = mpls_label;
335		break;
336	case ETH_P_TEB:
337		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338
339		if (ext_proto & EXTPROTO_VXLAN) {
340			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341
342			vxlan_hdr->vx_flags = VXLAN_FLAGS;
343			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344
345			l2_hdr += sizeof(struct vxlanhdr);
346		}
347
348		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349			return TC_ACT_SHOT;
350		break;
351	}
352	olen += l2_len;
353
354	/* add room between mac and network header */
355	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356		return TC_ACT_SHOT;
357
358	/* prepare new outer network header */
359	h_outer.ip = iph_inner;
360	h_outer.ip.payload_len = bpf_htons(olen +
361					   bpf_ntohs(h_outer.ip.payload_len));
362
363	h_outer.ip.nexthdr = encap_proto;
364
365	/* store new outer network header */
366	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367				BPF_F_INVALIDATE_HASH) < 0)
368		return TC_ACT_SHOT;
369
370	return TC_ACT_OK;
371}
372
373static int encap_ipv6_ipip6(struct __sk_buff *skb)
374{
375	struct iphdr iph_inner;
376	struct v6hdr h_outer;
377	struct tcphdr tcph;
378	struct ethhdr eth;
379	__u64 flags;
380	int olen;
381
382	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383			       sizeof(iph_inner)) < 0)
384		return TC_ACT_OK;
385
386	/* filter only packets we want */
387	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388			       &tcph, sizeof(tcph)) < 0)
389		return TC_ACT_OK;
390
391	if (tcph.dest != __bpf_constant_htons(cfg_port))
392		return TC_ACT_OK;
393
394	olen = sizeof(h_outer.ip);
395
396	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397
398	/* add room between mac and network header */
399	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400		return TC_ACT_SHOT;
401
402	/* prepare new outer network header */
403	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404	h_outer.ip.version = 6;
405	h_outer.ip.hop_limit = iph_inner.ttl;
406	h_outer.ip.saddr.s6_addr[1] = 0xfd;
407	h_outer.ip.saddr.s6_addr[15] = 1;
408	h_outer.ip.daddr.s6_addr[1] = 0xfd;
409	h_outer.ip.daddr.s6_addr[15] = 2;
410	h_outer.ip.payload_len = iph_inner.tot_len;
411	h_outer.ip.nexthdr = IPPROTO_IPIP;
412
413	/* store new outer network header */
414	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415				BPF_F_INVALIDATE_HASH) < 0)
416		return TC_ACT_SHOT;
417
418	/* update eth->h_proto */
419	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
420		return TC_ACT_SHOT;
421	eth.h_proto = bpf_htons(ETH_P_IPV6);
422	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
423		return TC_ACT_SHOT;
424
425	return TC_ACT_OK;
426}
427
428static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429				      __u16 l2_proto)
430{
431	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432}
433
434SEC("encap_ipip_none")
435int __encap_ipip_none(struct __sk_buff *skb)
436{
437	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439	else
440		return TC_ACT_OK;
441}
442
443SEC("encap_gre_none")
444int __encap_gre_none(struct __sk_buff *skb)
445{
446	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448	else
449		return TC_ACT_OK;
450}
451
452SEC("encap_gre_mpls")
453int __encap_gre_mpls(struct __sk_buff *skb)
454{
455	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457	else
458		return TC_ACT_OK;
459}
460
461SEC("encap_gre_eth")
462int __encap_gre_eth(struct __sk_buff *skb)
463{
464	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466	else
467		return TC_ACT_OK;
468}
469
470SEC("encap_udp_none")
471int __encap_udp_none(struct __sk_buff *skb)
472{
473	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475	else
476		return TC_ACT_OK;
477}
478
479SEC("encap_udp_mpls")
480int __encap_udp_mpls(struct __sk_buff *skb)
481{
482	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484	else
485		return TC_ACT_OK;
486}
487
488SEC("encap_udp_eth")
489int __encap_udp_eth(struct __sk_buff *skb)
490{
491	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493	else
494		return TC_ACT_OK;
495}
496
497SEC("encap_vxlan_eth")
498int __encap_vxlan_eth(struct __sk_buff *skb)
499{
500	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501		return __encap_ipv4(skb, IPPROTO_UDP,
502				    ETH_P_TEB,
503				    EXTPROTO_VXLAN);
504	else
505		return TC_ACT_OK;
506}
507
508SEC("encap_sit_none")
509int __encap_sit_none(struct __sk_buff *skb)
510{
511	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513	else
514		return TC_ACT_OK;
515}
516
517SEC("encap_ip6tnl_none")
518int __encap_ip6tnl_none(struct __sk_buff *skb)
519{
520	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522	else
523		return TC_ACT_OK;
524}
525
526SEC("encap_ipip6_none")
527int __encap_ipip6_none(struct __sk_buff *skb)
528{
529	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530		return encap_ipv6_ipip6(skb);
531	else
532		return TC_ACT_OK;
533}
534
535SEC("encap_ip6gre_none")
536int __encap_ip6gre_none(struct __sk_buff *skb)
537{
538	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540	else
541		return TC_ACT_OK;
542}
543
544SEC("encap_ip6gre_mpls")
545int __encap_ip6gre_mpls(struct __sk_buff *skb)
546{
547	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549	else
550		return TC_ACT_OK;
551}
552
553SEC("encap_ip6gre_eth")
554int __encap_ip6gre_eth(struct __sk_buff *skb)
555{
556	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558	else
559		return TC_ACT_OK;
560}
561
562SEC("encap_ip6udp_none")
563int __encap_ip6udp_none(struct __sk_buff *skb)
564{
565	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567	else
568		return TC_ACT_OK;
569}
570
571SEC("encap_ip6udp_mpls")
572int __encap_ip6udp_mpls(struct __sk_buff *skb)
573{
574	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576	else
577		return TC_ACT_OK;
578}
579
580SEC("encap_ip6udp_eth")
581int __encap_ip6udp_eth(struct __sk_buff *skb)
582{
583	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585	else
586		return TC_ACT_OK;
587}
588
589SEC("encap_ip6vxlan_eth")
590int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591{
592	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593		return __encap_ipv6(skb, IPPROTO_UDP,
594				    ETH_P_TEB,
595				    EXTPROTO_VXLAN);
596	else
597		return TC_ACT_OK;
598}
599
600static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601{
602	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603	struct ipv6_opt_hdr ip6_opt_hdr;
604	struct gre_hdr greh;
605	struct udphdr udph;
606	int olen = len;
607
608	switch (proto) {
609	case IPPROTO_IPIP:
610		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611		break;
612	case IPPROTO_IPV6:
613		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614		break;
615	case NEXTHDR_DEST:
616		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617				       sizeof(ip6_opt_hdr)) < 0)
618			return TC_ACT_OK;
619		switch (ip6_opt_hdr.nexthdr) {
620		case IPPROTO_IPIP:
621			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622			break;
623		case IPPROTO_IPV6:
624			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625			break;
626		default:
627			return TC_ACT_OK;
628		}
629		break;
630	case IPPROTO_GRE:
631		olen += sizeof(struct gre_hdr);
632		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633			return TC_ACT_OK;
634		switch (bpf_ntohs(greh.protocol)) {
635		case ETH_P_MPLS_UC:
636			olen += sizeof(mpls_label);
637			break;
638		case ETH_P_TEB:
639			olen += ETH_HLEN;
640			break;
641		}
642		break;
643	case IPPROTO_UDP:
644		olen += sizeof(struct udphdr);
645		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646			return TC_ACT_OK;
647		switch (bpf_ntohs(udph.dest)) {
648		case MPLS_OVER_UDP_PORT:
649			olen += sizeof(mpls_label);
650			break;
651		case ETH_OVER_UDP_PORT:
652			olen += ETH_HLEN;
653			break;
654		case VXLAN_UDP_PORT:
655			olen += ETH_HLEN + sizeof(struct vxlanhdr);
656			break;
657		}
658		break;
659	default:
660		return TC_ACT_OK;
661	}
662
663	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
664		return TC_ACT_SHOT;
665
666	return TC_ACT_OK;
667}
668
669static int decap_ipv4(struct __sk_buff *skb)
670{
671	struct iphdr iph_outer;
672
673	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674			       sizeof(iph_outer)) < 0)
675		return TC_ACT_OK;
676
677	if (iph_outer.ihl != 5)
678		return TC_ACT_OK;
679
680	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681			      iph_outer.protocol);
682}
683
684static int decap_ipv6(struct __sk_buff *skb)
685{
686	struct ipv6hdr iph_outer;
687
688	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689			       sizeof(iph_outer)) < 0)
690		return TC_ACT_OK;
691
692	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693			      iph_outer.nexthdr);
694}
695
696SEC("decap")
697int decap_f(struct __sk_buff *skb)
698{
699	switch (skb->protocol) {
700	case __bpf_constant_htons(ETH_P_IP):
701		return decap_ipv4(skb);
702	case __bpf_constant_htons(ETH_P_IPV6):
703		return decap_ipv6(skb);
704	default:
705		/* does not match, ignore */
706		return TC_ACT_OK;
707	}
708}
709
710char __license[] SEC("license") = "GPL";
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include <bpf/bpf_endian.h>
 21#include <bpf/bpf_helpers.h>
 
 
 
 22
 23static const int cfg_port = 8000;
 24
 25static const int cfg_udp_src = 20000;
 26
 27#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 28
 29#define	UDP_PORT		5555
 30#define	MPLS_OVER_UDP_PORT	6635
 31#define	ETH_OVER_UDP_PORT	7777
 32#define	VXLAN_UDP_PORT		8472
 33
 34#define	EXTPROTO_VXLAN	0x1
 35
 36#define	VXLAN_N_VID     (1u << 24)
 37#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
 38#define	VXLAN_FLAGS     0x8
 39#define	VXLAN_VNI       1
 40
 41#ifndef NEXTHDR_DEST
 42#define NEXTHDR_DEST	60
 43#endif
 44
 45/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 46static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 47						     MPLS_LS_S_MASK | 0xff);
 48
 49struct vxlanhdr {
 50	__be32 vx_flags;
 51	__be32 vx_vni;
 52} __attribute__((packed));
 53
 54struct gre_hdr {
 55	__be16 flags;
 56	__be16 protocol;
 57} __attribute__((packed));
 58
 59union l4hdr {
 60	struct udphdr udp;
 61	struct gre_hdr gre;
 62};
 63
 64struct v4hdr {
 65	struct iphdr ip;
 66	union l4hdr l4hdr;
 67	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 68} __attribute__((packed));
 69
 70struct v6hdr {
 71	struct ipv6hdr ip;
 72	union l4hdr l4hdr;
 73	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 74} __attribute__((packed));
 75
 76static __always_inline void set_ipv4_csum(struct iphdr *iph)
 77{
 78	__u16 *iph16 = (__u16 *)iph;
 79	__u32 csum;
 80	int i;
 81
 82	iph->check = 0;
 83
 84#pragma clang loop unroll(full)
 85	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 86		csum += *iph16++;
 87
 88	iph->check = ~((csum & 0xffff) + (csum >> 16));
 89}
 90
 91static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 92					__u16 l2_proto, __u16 ext_proto)
 93{
 94	__u16 udp_dst = UDP_PORT;
 95	struct iphdr iph_inner;
 96	struct v4hdr h_outer;
 97	struct tcphdr tcph;
 98	int olen, l2_len;
 99	__u8 *l2_hdr = NULL;
100	int tcp_off;
101	__u64 flags;
102
103	/* Most tests encapsulate a packet into a tunnel with the same
104	 * network protocol, and derive the outer header fields from
105	 * the inner header.
106	 *
107	 * The 6in4 case tests different inner and outer protocols. As
108	 * the inner is ipv6, but the outer expects an ipv4 header as
109	 * input, manually build a struct iphdr based on the ipv6hdr.
110	 */
111	if (encap_proto == IPPROTO_IPV6) {
112		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
113		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
114		struct ipv6hdr iph6_inner;
115
116		/* Read the IPv6 header */
117		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
118				       sizeof(iph6_inner)) < 0)
119			return TC_ACT_OK;
120
121		/* Derive the IPv4 header fields from the IPv6 header */
122		memset(&iph_inner, 0, sizeof(iph_inner));
123		iph_inner.version = 4;
124		iph_inner.ihl = 5;
125		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
126				    bpf_ntohs(iph6_inner.payload_len));
127		iph_inner.ttl = iph6_inner.hop_limit - 1;
128		iph_inner.protocol = iph6_inner.nexthdr;
129		iph_inner.saddr = __bpf_constant_htonl(saddr);
130		iph_inner.daddr = __bpf_constant_htonl(daddr);
131
132		tcp_off = sizeof(iph6_inner);
133	} else {
134		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
135				       sizeof(iph_inner)) < 0)
136			return TC_ACT_OK;
137
138		tcp_off = sizeof(iph_inner);
139	}
140
141	/* filter only packets we want */
142	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
143		return TC_ACT_OK;
144
145	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
146			       &tcph, sizeof(tcph)) < 0)
147		return TC_ACT_OK;
148
149	if (tcph.dest != __bpf_constant_htons(cfg_port))
150		return TC_ACT_OK;
151
152	olen = sizeof(h_outer.ip);
153	l2_len = 0;
154
155	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
156
157	switch (l2_proto) {
158	case ETH_P_MPLS_UC:
159		l2_len = sizeof(mpls_label);
160		udp_dst = MPLS_OVER_UDP_PORT;
161		break;
162	case ETH_P_TEB:
163		l2_len = ETH_HLEN;
164		if (ext_proto & EXTPROTO_VXLAN) {
165			udp_dst = VXLAN_UDP_PORT;
166			l2_len += sizeof(struct vxlanhdr);
167		} else
168			udp_dst = ETH_OVER_UDP_PORT;
169		break;
170	}
171	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
172
173	switch (encap_proto) {
174	case IPPROTO_GRE:
175		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
176		olen += sizeof(h_outer.l4hdr.gre);
177		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
178		h_outer.l4hdr.gre.flags = 0;
179		break;
180	case IPPROTO_UDP:
181		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
182		olen += sizeof(h_outer.l4hdr.udp);
183		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
184		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
185		h_outer.l4hdr.udp.check = 0;
186		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
187						  sizeof(h_outer.l4hdr.udp) +
188						  l2_len);
189		break;
190	case IPPROTO_IPIP:
191	case IPPROTO_IPV6:
192		break;
193	default:
194		return TC_ACT_OK;
195	}
196
197	/* add L2 encap (if specified) */
198	l2_hdr = (__u8 *)&h_outer + olen;
199	switch (l2_proto) {
200	case ETH_P_MPLS_UC:
201		*(__u32 *)l2_hdr = mpls_label;
202		break;
203	case ETH_P_TEB:
204		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
205
206		if (ext_proto & EXTPROTO_VXLAN) {
207			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
208
209			vxlan_hdr->vx_flags = VXLAN_FLAGS;
210			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
211
212			l2_hdr += sizeof(struct vxlanhdr);
213		}
214
215		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
216			return TC_ACT_SHOT;
217
218		break;
219	}
220	olen += l2_len;
221
222	/* add room between mac and network header */
223	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
224		return TC_ACT_SHOT;
225
226	/* prepare new outer network header */
227	h_outer.ip = iph_inner;
228	h_outer.ip.tot_len = bpf_htons(olen +
229				       bpf_ntohs(h_outer.ip.tot_len));
230	h_outer.ip.protocol = encap_proto;
231
232	set_ipv4_csum((void *)&h_outer.ip);
233
234	/* store new outer network header */
235	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
236				BPF_F_INVALIDATE_HASH) < 0)
237		return TC_ACT_SHOT;
238
239	/* if changing outer proto type, update eth->h_proto */
240	if (encap_proto == IPPROTO_IPV6) {
241		struct ethhdr eth;
242
243		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
244			return TC_ACT_SHOT;
245		eth.h_proto = bpf_htons(ETH_P_IP);
246		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
247			return TC_ACT_SHOT;
248	}
249
250	return TC_ACT_OK;
251}
252
253static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
254				      __u16 l2_proto)
255{
256	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
257}
258
259static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
260					__u16 l2_proto, __u16 ext_proto)
261{
262	__u16 udp_dst = UDP_PORT;
263	struct ipv6hdr iph_inner;
264	struct v6hdr h_outer;
265	struct tcphdr tcph;
266	int olen, l2_len;
267	__u8 *l2_hdr = NULL;
268	__u16 tot_len;
269	__u64 flags;
270
271	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
272			       sizeof(iph_inner)) < 0)
273		return TC_ACT_OK;
274
275	/* filter only packets we want */
276	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
277			       &tcph, sizeof(tcph)) < 0)
278		return TC_ACT_OK;
279
280	if (tcph.dest != __bpf_constant_htons(cfg_port))
281		return TC_ACT_OK;
282
283	olen = sizeof(h_outer.ip);
284	l2_len = 0;
285
286	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
287
288	switch (l2_proto) {
289	case ETH_P_MPLS_UC:
290		l2_len = sizeof(mpls_label);
291		udp_dst = MPLS_OVER_UDP_PORT;
292		break;
293	case ETH_P_TEB:
294		l2_len = ETH_HLEN;
295		if (ext_proto & EXTPROTO_VXLAN) {
296			udp_dst = VXLAN_UDP_PORT;
297			l2_len += sizeof(struct vxlanhdr);
298		} else
299			udp_dst = ETH_OVER_UDP_PORT;
300		break;
301	}
302	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
303
304	switch (encap_proto) {
305	case IPPROTO_GRE:
306		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
307		olen += sizeof(h_outer.l4hdr.gre);
308		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
309		h_outer.l4hdr.gre.flags = 0;
310		break;
311	case IPPROTO_UDP:
312		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
313		olen += sizeof(h_outer.l4hdr.udp);
314		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
315		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
316		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
317			  sizeof(h_outer.l4hdr.udp) + l2_len;
318		h_outer.l4hdr.udp.check = 0;
319		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
320		break;
321	case IPPROTO_IPV6:
322		break;
323	default:
324		return TC_ACT_OK;
325	}
326
327	/* add L2 encap (if specified) */
328	l2_hdr = (__u8 *)&h_outer + olen;
329	switch (l2_proto) {
330	case ETH_P_MPLS_UC:
331		*(__u32 *)l2_hdr = mpls_label;
332		break;
333	case ETH_P_TEB:
334		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
335
336		if (ext_proto & EXTPROTO_VXLAN) {
337			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
338
339			vxlan_hdr->vx_flags = VXLAN_FLAGS;
340			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
341
342			l2_hdr += sizeof(struct vxlanhdr);
343		}
344
345		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
346			return TC_ACT_SHOT;
347		break;
348	}
349	olen += l2_len;
350
351	/* add room between mac and network header */
352	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
353		return TC_ACT_SHOT;
354
355	/* prepare new outer network header */
356	h_outer.ip = iph_inner;
357	h_outer.ip.payload_len = bpf_htons(olen +
358					   bpf_ntohs(h_outer.ip.payload_len));
359
360	h_outer.ip.nexthdr = encap_proto;
361
362	/* store new outer network header */
363	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
364				BPF_F_INVALIDATE_HASH) < 0)
365		return TC_ACT_SHOT;
366
367	return TC_ACT_OK;
368}
369
370static int encap_ipv6_ipip6(struct __sk_buff *skb)
371{
372	struct iphdr iph_inner;
373	struct v6hdr h_outer;
374	struct tcphdr tcph;
375	struct ethhdr eth;
376	__u64 flags;
377	int olen;
378
379	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
380			       sizeof(iph_inner)) < 0)
381		return TC_ACT_OK;
382
383	/* filter only packets we want */
384	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
385			       &tcph, sizeof(tcph)) < 0)
386		return TC_ACT_OK;
387
388	if (tcph.dest != __bpf_constant_htons(cfg_port))
389		return TC_ACT_OK;
390
391	olen = sizeof(h_outer.ip);
392
393	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
394
395	/* add room between mac and network header */
396	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
397		return TC_ACT_SHOT;
398
399	/* prepare new outer network header */
400	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
401	h_outer.ip.version = 6;
402	h_outer.ip.hop_limit = iph_inner.ttl;
403	h_outer.ip.saddr.s6_addr[1] = 0xfd;
404	h_outer.ip.saddr.s6_addr[15] = 1;
405	h_outer.ip.daddr.s6_addr[1] = 0xfd;
406	h_outer.ip.daddr.s6_addr[15] = 2;
407	h_outer.ip.payload_len = iph_inner.tot_len;
408	h_outer.ip.nexthdr = IPPROTO_IPIP;
409
410	/* store new outer network header */
411	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
412				BPF_F_INVALIDATE_HASH) < 0)
413		return TC_ACT_SHOT;
414
415	/* update eth->h_proto */
416	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
417		return TC_ACT_SHOT;
418	eth.h_proto = bpf_htons(ETH_P_IPV6);
419	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
420		return TC_ACT_SHOT;
421
422	return TC_ACT_OK;
423}
424
425static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
426				      __u16 l2_proto)
427{
428	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
429}
430
431SEC("encap_ipip_none")
432int __encap_ipip_none(struct __sk_buff *skb)
433{
434	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
435		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
436	else
437		return TC_ACT_OK;
438}
439
440SEC("encap_gre_none")
441int __encap_gre_none(struct __sk_buff *skb)
442{
443	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
444		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
445	else
446		return TC_ACT_OK;
447}
448
449SEC("encap_gre_mpls")
450int __encap_gre_mpls(struct __sk_buff *skb)
451{
452	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
453		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
454	else
455		return TC_ACT_OK;
456}
457
458SEC("encap_gre_eth")
459int __encap_gre_eth(struct __sk_buff *skb)
460{
461	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
462		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
463	else
464		return TC_ACT_OK;
465}
466
467SEC("encap_udp_none")
468int __encap_udp_none(struct __sk_buff *skb)
469{
470	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
471		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
472	else
473		return TC_ACT_OK;
474}
475
476SEC("encap_udp_mpls")
477int __encap_udp_mpls(struct __sk_buff *skb)
478{
479	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
480		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
481	else
482		return TC_ACT_OK;
483}
484
485SEC("encap_udp_eth")
486int __encap_udp_eth(struct __sk_buff *skb)
487{
488	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
489		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
490	else
491		return TC_ACT_OK;
492}
493
494SEC("encap_vxlan_eth")
495int __encap_vxlan_eth(struct __sk_buff *skb)
496{
497	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
498		return __encap_ipv4(skb, IPPROTO_UDP,
499				    ETH_P_TEB,
500				    EXTPROTO_VXLAN);
501	else
502		return TC_ACT_OK;
503}
504
505SEC("encap_sit_none")
506int __encap_sit_none(struct __sk_buff *skb)
507{
508	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
509		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
510	else
511		return TC_ACT_OK;
512}
513
514SEC("encap_ip6tnl_none")
515int __encap_ip6tnl_none(struct __sk_buff *skb)
516{
517	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
518		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
519	else
520		return TC_ACT_OK;
521}
522
523SEC("encap_ipip6_none")
524int __encap_ipip6_none(struct __sk_buff *skb)
525{
526	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
527		return encap_ipv6_ipip6(skb);
528	else
529		return TC_ACT_OK;
530}
531
532SEC("encap_ip6gre_none")
533int __encap_ip6gre_none(struct __sk_buff *skb)
534{
535	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
536		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
537	else
538		return TC_ACT_OK;
539}
540
541SEC("encap_ip6gre_mpls")
542int __encap_ip6gre_mpls(struct __sk_buff *skb)
543{
544	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
545		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
546	else
547		return TC_ACT_OK;
548}
549
550SEC("encap_ip6gre_eth")
551int __encap_ip6gre_eth(struct __sk_buff *skb)
552{
553	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
554		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
555	else
556		return TC_ACT_OK;
557}
558
559SEC("encap_ip6udp_none")
560int __encap_ip6udp_none(struct __sk_buff *skb)
561{
562	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
563		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
564	else
565		return TC_ACT_OK;
566}
567
568SEC("encap_ip6udp_mpls")
569int __encap_ip6udp_mpls(struct __sk_buff *skb)
570{
571	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
572		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
573	else
574		return TC_ACT_OK;
575}
576
577SEC("encap_ip6udp_eth")
578int __encap_ip6udp_eth(struct __sk_buff *skb)
579{
580	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
581		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
582	else
583		return TC_ACT_OK;
584}
585
586SEC("encap_ip6vxlan_eth")
587int __encap_ip6vxlan_eth(struct __sk_buff *skb)
588{
589	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
590		return __encap_ipv6(skb, IPPROTO_UDP,
591				    ETH_P_TEB,
592				    EXTPROTO_VXLAN);
593	else
594		return TC_ACT_OK;
595}
596
597static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
598{
599	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
600	struct ipv6_opt_hdr ip6_opt_hdr;
601	struct gre_hdr greh;
602	struct udphdr udph;
603	int olen = len;
604
605	switch (proto) {
606	case IPPROTO_IPIP:
607		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
608		break;
609	case IPPROTO_IPV6:
610		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
611		break;
612	case NEXTHDR_DEST:
613		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
614				       sizeof(ip6_opt_hdr)) < 0)
615			return TC_ACT_OK;
616		switch (ip6_opt_hdr.nexthdr) {
617		case IPPROTO_IPIP:
618			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
619			break;
620		case IPPROTO_IPV6:
621			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
622			break;
623		default:
624			return TC_ACT_OK;
625		}
626		break;
627	case IPPROTO_GRE:
628		olen += sizeof(struct gre_hdr);
629		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
630			return TC_ACT_OK;
631		switch (bpf_ntohs(greh.protocol)) {
632		case ETH_P_MPLS_UC:
633			olen += sizeof(mpls_label);
634			break;
635		case ETH_P_TEB:
636			olen += ETH_HLEN;
637			break;
638		}
639		break;
640	case IPPROTO_UDP:
641		olen += sizeof(struct udphdr);
642		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
643			return TC_ACT_OK;
644		switch (bpf_ntohs(udph.dest)) {
645		case MPLS_OVER_UDP_PORT:
646			olen += sizeof(mpls_label);
647			break;
648		case ETH_OVER_UDP_PORT:
649			olen += ETH_HLEN;
650			break;
651		case VXLAN_UDP_PORT:
652			olen += ETH_HLEN + sizeof(struct vxlanhdr);
653			break;
654		}
655		break;
656	default:
657		return TC_ACT_OK;
658	}
659
660	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
661		return TC_ACT_SHOT;
662
663	return TC_ACT_OK;
664}
665
666static int decap_ipv4(struct __sk_buff *skb)
667{
668	struct iphdr iph_outer;
669
670	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
671			       sizeof(iph_outer)) < 0)
672		return TC_ACT_OK;
673
674	if (iph_outer.ihl != 5)
675		return TC_ACT_OK;
676
677	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
678			      iph_outer.protocol);
679}
680
681static int decap_ipv6(struct __sk_buff *skb)
682{
683	struct ipv6hdr iph_outer;
684
685	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
686			       sizeof(iph_outer)) < 0)
687		return TC_ACT_OK;
688
689	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
690			      iph_outer.nexthdr);
691}
692
693SEC("decap")
694int decap_f(struct __sk_buff *skb)
695{
696	switch (skb->protocol) {
697	case __bpf_constant_htons(ETH_P_IP):
698		return decap_ipv4(skb);
699	case __bpf_constant_htons(ETH_P_IPV6):
700		return decap_ipv6(skb);
701	default:
702		/* does not match, ignore */
703		return TC_ACT_OK;
704	}
705}
706
707char __license[] SEC("license") = "GPL";