Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include <bpf/bpf_endian.h>
 21#include <bpf/bpf_helpers.h>
 22
 23static const int cfg_port = 8000;
 24
 25static const int cfg_udp_src = 20000;
 26
 
 
 27#define	UDP_PORT		5555
 28#define	MPLS_OVER_UDP_PORT	6635
 29#define	ETH_OVER_UDP_PORT	7777
 
 
 
 
 
 
 
 
 
 
 
 
 30
 31/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 32static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 33						     MPLS_LS_S_MASK | 0xff);
 34
 
 
 
 
 
 35struct gre_hdr {
 36	__be16 flags;
 37	__be16 protocol;
 38} __attribute__((packed));
 39
 40union l4hdr {
 41	struct udphdr udp;
 42	struct gre_hdr gre;
 43};
 44
 45struct v4hdr {
 46	struct iphdr ip;
 47	union l4hdr l4hdr;
 48	__u8 pad[16];			/* enough space for L2 header */
 49} __attribute__((packed));
 50
 51struct v6hdr {
 52	struct ipv6hdr ip;
 53	union l4hdr l4hdr;
 54	__u8 pad[16];			/* enough space for L2 header */
 55} __attribute__((packed));
 56
 57static __always_inline void set_ipv4_csum(struct iphdr *iph)
 58{
 59	__u16 *iph16 = (__u16 *)iph;
 60	__u32 csum;
 61	int i;
 62
 63	iph->check = 0;
 64
 65#pragma clang loop unroll(full)
 66	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 67		csum += *iph16++;
 68
 69	iph->check = ~((csum & 0xffff) + (csum >> 16));
 70}
 71
 72static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 73				      __u16 l2_proto)
 74{
 75	__u16 udp_dst = UDP_PORT;
 76	struct iphdr iph_inner;
 77	struct v4hdr h_outer;
 78	struct tcphdr tcph;
 79	int olen, l2_len;
 
 80	int tcp_off;
 81	__u64 flags;
 82
 83	/* Most tests encapsulate a packet into a tunnel with the same
 84	 * network protocol, and derive the outer header fields from
 85	 * the inner header.
 86	 *
 87	 * The 6in4 case tests different inner and outer protocols. As
 88	 * the inner is ipv6, but the outer expects an ipv4 header as
 89	 * input, manually build a struct iphdr based on the ipv6hdr.
 90	 */
 91	if (encap_proto == IPPROTO_IPV6) {
 92		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
 93		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
 94		struct ipv6hdr iph6_inner;
 95
 96		/* Read the IPv6 header */
 97		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
 98				       sizeof(iph6_inner)) < 0)
 99			return TC_ACT_OK;
100
101		/* Derive the IPv4 header fields from the IPv6 header */
102		memset(&iph_inner, 0, sizeof(iph_inner));
103		iph_inner.version = 4;
104		iph_inner.ihl = 5;
105		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
106				    bpf_ntohs(iph6_inner.payload_len));
107		iph_inner.ttl = iph6_inner.hop_limit - 1;
108		iph_inner.protocol = iph6_inner.nexthdr;
109		iph_inner.saddr = __bpf_constant_htonl(saddr);
110		iph_inner.daddr = __bpf_constant_htonl(daddr);
111
112		tcp_off = sizeof(iph6_inner);
113	} else {
114		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
115				       sizeof(iph_inner)) < 0)
116			return TC_ACT_OK;
117
118		tcp_off = sizeof(iph_inner);
119	}
120
121	/* filter only packets we want */
122	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
123		return TC_ACT_OK;
124
125	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
126			       &tcph, sizeof(tcph)) < 0)
127		return TC_ACT_OK;
128
129	if (tcph.dest != __bpf_constant_htons(cfg_port))
130		return TC_ACT_OK;
131
132	olen = sizeof(h_outer.ip);
133	l2_len = 0;
134
135	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
136
137	switch (l2_proto) {
138	case ETH_P_MPLS_UC:
139		l2_len = sizeof(mpls_label);
140		udp_dst = MPLS_OVER_UDP_PORT;
141		break;
142	case ETH_P_TEB:
143		l2_len = ETH_HLEN;
144		udp_dst = ETH_OVER_UDP_PORT;
 
 
 
 
145		break;
146	}
147	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
148
149	switch (encap_proto) {
150	case IPPROTO_GRE:
151		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
152		olen += sizeof(h_outer.l4hdr.gre);
153		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
154		h_outer.l4hdr.gre.flags = 0;
155		break;
156	case IPPROTO_UDP:
157		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
158		olen += sizeof(h_outer.l4hdr.udp);
159		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
160		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
161		h_outer.l4hdr.udp.check = 0;
162		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
163						  sizeof(h_outer.l4hdr.udp) +
164						  l2_len);
165		break;
166	case IPPROTO_IPIP:
167	case IPPROTO_IPV6:
168		break;
169	default:
170		return TC_ACT_OK;
171	}
172
173	/* add L2 encap (if specified) */
 
174	switch (l2_proto) {
175	case ETH_P_MPLS_UC:
176		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
177		break;
178	case ETH_P_TEB:
179		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
180				       ETH_HLEN))
 
 
 
 
 
 
 
 
 
 
181			return TC_ACT_SHOT;
 
182		break;
183	}
184	olen += l2_len;
185
186	/* add room between mac and network header */
187	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
188		return TC_ACT_SHOT;
189
190	/* prepare new outer network header */
191	h_outer.ip = iph_inner;
192	h_outer.ip.tot_len = bpf_htons(olen +
193				       bpf_ntohs(h_outer.ip.tot_len));
194	h_outer.ip.protocol = encap_proto;
195
196	set_ipv4_csum((void *)&h_outer.ip);
197
198	/* store new outer network header */
199	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
200				BPF_F_INVALIDATE_HASH) < 0)
201		return TC_ACT_SHOT;
202
203	/* if changing outer proto type, update eth->h_proto */
204	if (encap_proto == IPPROTO_IPV6) {
205		struct ethhdr eth;
206
207		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
208			return TC_ACT_SHOT;
209		eth.h_proto = bpf_htons(ETH_P_IP);
210		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
211			return TC_ACT_SHOT;
212	}
213
214	return TC_ACT_OK;
215}
216
217static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
218				      __u16 l2_proto)
219{
 
 
 
 
 
 
220	__u16 udp_dst = UDP_PORT;
221	struct ipv6hdr iph_inner;
222	struct v6hdr h_outer;
223	struct tcphdr tcph;
224	int olen, l2_len;
 
225	__u16 tot_len;
226	__u64 flags;
227
228	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
229			       sizeof(iph_inner)) < 0)
230		return TC_ACT_OK;
231
232	/* filter only packets we want */
233	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
234			       &tcph, sizeof(tcph)) < 0)
235		return TC_ACT_OK;
236
237	if (tcph.dest != __bpf_constant_htons(cfg_port))
238		return TC_ACT_OK;
239
240	olen = sizeof(h_outer.ip);
241	l2_len = 0;
242
243	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
244
245	switch (l2_proto) {
246	case ETH_P_MPLS_UC:
247		l2_len = sizeof(mpls_label);
248		udp_dst = MPLS_OVER_UDP_PORT;
249		break;
250	case ETH_P_TEB:
251		l2_len = ETH_HLEN;
252		udp_dst = ETH_OVER_UDP_PORT;
 
 
 
 
253		break;
254	}
255	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
256
257	switch (encap_proto) {
258	case IPPROTO_GRE:
259		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
260		olen += sizeof(h_outer.l4hdr.gre);
261		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
262		h_outer.l4hdr.gre.flags = 0;
263		break;
264	case IPPROTO_UDP:
265		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
266		olen += sizeof(h_outer.l4hdr.udp);
267		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
268		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
269		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
270			  sizeof(h_outer.l4hdr.udp);
271		h_outer.l4hdr.udp.check = 0;
272		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
273		break;
274	case IPPROTO_IPV6:
275		break;
276	default:
277		return TC_ACT_OK;
278	}
279
280	/* add L2 encap (if specified) */
 
281	switch (l2_proto) {
282	case ETH_P_MPLS_UC:
283		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
284		break;
285	case ETH_P_TEB:
286		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
287				       ETH_HLEN))
 
 
 
 
 
 
 
 
 
 
288			return TC_ACT_SHOT;
289		break;
290	}
291	olen += l2_len;
292
293	/* add room between mac and network header */
294	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
295		return TC_ACT_SHOT;
296
297	/* prepare new outer network header */
298	h_outer.ip = iph_inner;
299	h_outer.ip.payload_len = bpf_htons(olen +
300					   bpf_ntohs(h_outer.ip.payload_len));
301
302	h_outer.ip.nexthdr = encap_proto;
303
304	/* store new outer network header */
305	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
306				BPF_F_INVALIDATE_HASH) < 0)
307		return TC_ACT_SHOT;
308
309	return TC_ACT_OK;
310}
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312SEC("encap_ipip_none")
313int __encap_ipip_none(struct __sk_buff *skb)
314{
315	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
316		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
317	else
318		return TC_ACT_OK;
319}
320
321SEC("encap_gre_none")
322int __encap_gre_none(struct __sk_buff *skb)
323{
324	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
325		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
326	else
327		return TC_ACT_OK;
328}
329
330SEC("encap_gre_mpls")
331int __encap_gre_mpls(struct __sk_buff *skb)
332{
333	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
334		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
335	else
336		return TC_ACT_OK;
337}
338
339SEC("encap_gre_eth")
340int __encap_gre_eth(struct __sk_buff *skb)
341{
342	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
343		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
344	else
345		return TC_ACT_OK;
346}
347
348SEC("encap_udp_none")
349int __encap_udp_none(struct __sk_buff *skb)
350{
351	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
352		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
353	else
354		return TC_ACT_OK;
355}
356
357SEC("encap_udp_mpls")
358int __encap_udp_mpls(struct __sk_buff *skb)
359{
360	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
361		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
362	else
363		return TC_ACT_OK;
364}
365
366SEC("encap_udp_eth")
367int __encap_udp_eth(struct __sk_buff *skb)
368{
369	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
370		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
371	else
372		return TC_ACT_OK;
373}
374
 
 
 
 
 
 
 
 
 
 
 
375SEC("encap_sit_none")
376int __encap_sit_none(struct __sk_buff *skb)
377{
378	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
379		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
380	else
381		return TC_ACT_OK;
382}
383
384SEC("encap_ip6tnl_none")
385int __encap_ip6tnl_none(struct __sk_buff *skb)
386{
387	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
388		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
389	else
390		return TC_ACT_OK;
391}
392
 
 
 
 
 
 
 
 
 
393SEC("encap_ip6gre_none")
394int __encap_ip6gre_none(struct __sk_buff *skb)
395{
396	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
397		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
398	else
399		return TC_ACT_OK;
400}
401
402SEC("encap_ip6gre_mpls")
403int __encap_ip6gre_mpls(struct __sk_buff *skb)
404{
405	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
406		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
407	else
408		return TC_ACT_OK;
409}
410
411SEC("encap_ip6gre_eth")
412int __encap_ip6gre_eth(struct __sk_buff *skb)
413{
414	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
415		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
416	else
417		return TC_ACT_OK;
418}
419
420SEC("encap_ip6udp_none")
421int __encap_ip6udp_none(struct __sk_buff *skb)
422{
423	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
424		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
425	else
426		return TC_ACT_OK;
427}
428
429SEC("encap_ip6udp_mpls")
430int __encap_ip6udp_mpls(struct __sk_buff *skb)
431{
432	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
433		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
434	else
435		return TC_ACT_OK;
436}
437
438SEC("encap_ip6udp_eth")
439int __encap_ip6udp_eth(struct __sk_buff *skb)
440{
441	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
442		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
443	else
444		return TC_ACT_OK;
445}
446
 
 
 
 
 
 
 
 
 
 
 
447static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
448{
449	char buf[sizeof(struct v6hdr)];
 
450	struct gre_hdr greh;
451	struct udphdr udph;
452	int olen = len;
453
454	switch (proto) {
455	case IPPROTO_IPIP:
 
 
456	case IPPROTO_IPV6:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457		break;
458	case IPPROTO_GRE:
459		olen += sizeof(struct gre_hdr);
460		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
461			return TC_ACT_OK;
462		switch (bpf_ntohs(greh.protocol)) {
463		case ETH_P_MPLS_UC:
464			olen += sizeof(mpls_label);
465			break;
466		case ETH_P_TEB:
467			olen += ETH_HLEN;
468			break;
469		}
470		break;
471	case IPPROTO_UDP:
472		olen += sizeof(struct udphdr);
473		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
474			return TC_ACT_OK;
475		switch (bpf_ntohs(udph.dest)) {
476		case MPLS_OVER_UDP_PORT:
477			olen += sizeof(mpls_label);
478			break;
479		case ETH_OVER_UDP_PORT:
480			olen += ETH_HLEN;
481			break;
 
 
 
482		}
483		break;
484	default:
485		return TC_ACT_OK;
486	}
487
488	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
489				BPF_F_ADJ_ROOM_FIXED_GSO))
490		return TC_ACT_SHOT;
491
492	return TC_ACT_OK;
493}
494
495static int decap_ipv4(struct __sk_buff *skb)
496{
497	struct iphdr iph_outer;
498
499	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
500			       sizeof(iph_outer)) < 0)
501		return TC_ACT_OK;
502
503	if (iph_outer.ihl != 5)
504		return TC_ACT_OK;
505
506	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
507			      iph_outer.protocol);
508}
509
510static int decap_ipv6(struct __sk_buff *skb)
511{
512	struct ipv6hdr iph_outer;
513
514	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
515			       sizeof(iph_outer)) < 0)
516		return TC_ACT_OK;
517
518	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
519			      iph_outer.nexthdr);
520}
521
522SEC("decap")
523int decap_f(struct __sk_buff *skb)
524{
525	switch (skb->protocol) {
526	case __bpf_constant_htons(ETH_P_IP):
527		return decap_ipv4(skb);
528	case __bpf_constant_htons(ETH_P_IPV6):
529		return decap_ipv6(skb);
530	default:
531		/* does not match, ignore */
532		return TC_ACT_OK;
533	}
534}
535
536char __license[] SEC("license") = "GPL";
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include <bpf/bpf_endian.h>
 21#include <bpf/bpf_helpers.h>
 22
 23static const int cfg_port = 8000;
 24
 25static const int cfg_udp_src = 20000;
 26
 27#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 28
 29#define	UDP_PORT		5555
 30#define	MPLS_OVER_UDP_PORT	6635
 31#define	ETH_OVER_UDP_PORT	7777
 32#define	VXLAN_UDP_PORT		8472
 33
 34#define	EXTPROTO_VXLAN	0x1
 35
 36#define	VXLAN_N_VID     (1u << 24)
 37#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
 38#define	VXLAN_FLAGS     0x8
 39#define	VXLAN_VNI       1
 40
 41#ifndef NEXTHDR_DEST
 42#define NEXTHDR_DEST	60
 43#endif
 44
 45/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 46static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 47						     MPLS_LS_S_MASK | 0xff);
 48
 49struct vxlanhdr {
 50	__be32 vx_flags;
 51	__be32 vx_vni;
 52} __attribute__((packed));
 53
 54struct gre_hdr {
 55	__be16 flags;
 56	__be16 protocol;
 57} __attribute__((packed));
 58
 59union l4hdr {
 60	struct udphdr udp;
 61	struct gre_hdr gre;
 62};
 63
 64struct v4hdr {
 65	struct iphdr ip;
 66	union l4hdr l4hdr;
 67	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 68} __attribute__((packed));
 69
 70struct v6hdr {
 71	struct ipv6hdr ip;
 72	union l4hdr l4hdr;
 73	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 74} __attribute__((packed));
 75
 76static __always_inline void set_ipv4_csum(struct iphdr *iph)
 77{
 78	__u16 *iph16 = (__u16 *)iph;
 79	__u32 csum;
 80	int i;
 81
 82	iph->check = 0;
 83
 84#pragma clang loop unroll(full)
 85	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 86		csum += *iph16++;
 87
 88	iph->check = ~((csum & 0xffff) + (csum >> 16));
 89}
 90
 91static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 92					__u16 l2_proto, __u16 ext_proto)
 93{
 94	__u16 udp_dst = UDP_PORT;
 95	struct iphdr iph_inner;
 96	struct v4hdr h_outer;
 97	struct tcphdr tcph;
 98	int olen, l2_len;
 99	__u8 *l2_hdr = NULL;
100	int tcp_off;
101	__u64 flags;
102
103	/* Most tests encapsulate a packet into a tunnel with the same
104	 * network protocol, and derive the outer header fields from
105	 * the inner header.
106	 *
107	 * The 6in4 case tests different inner and outer protocols. As
108	 * the inner is ipv6, but the outer expects an ipv4 header as
109	 * input, manually build a struct iphdr based on the ipv6hdr.
110	 */
111	if (encap_proto == IPPROTO_IPV6) {
112		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
113		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
114		struct ipv6hdr iph6_inner;
115
116		/* Read the IPv6 header */
117		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
118				       sizeof(iph6_inner)) < 0)
119			return TC_ACT_OK;
120
121		/* Derive the IPv4 header fields from the IPv6 header */
122		memset(&iph_inner, 0, sizeof(iph_inner));
123		iph_inner.version = 4;
124		iph_inner.ihl = 5;
125		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
126				    bpf_ntohs(iph6_inner.payload_len));
127		iph_inner.ttl = iph6_inner.hop_limit - 1;
128		iph_inner.protocol = iph6_inner.nexthdr;
129		iph_inner.saddr = __bpf_constant_htonl(saddr);
130		iph_inner.daddr = __bpf_constant_htonl(daddr);
131
132		tcp_off = sizeof(iph6_inner);
133	} else {
134		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
135				       sizeof(iph_inner)) < 0)
136			return TC_ACT_OK;
137
138		tcp_off = sizeof(iph_inner);
139	}
140
141	/* filter only packets we want */
142	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
143		return TC_ACT_OK;
144
145	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
146			       &tcph, sizeof(tcph)) < 0)
147		return TC_ACT_OK;
148
149	if (tcph.dest != __bpf_constant_htons(cfg_port))
150		return TC_ACT_OK;
151
152	olen = sizeof(h_outer.ip);
153	l2_len = 0;
154
155	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
156
157	switch (l2_proto) {
158	case ETH_P_MPLS_UC:
159		l2_len = sizeof(mpls_label);
160		udp_dst = MPLS_OVER_UDP_PORT;
161		break;
162	case ETH_P_TEB:
163		l2_len = ETH_HLEN;
164		if (ext_proto & EXTPROTO_VXLAN) {
165			udp_dst = VXLAN_UDP_PORT;
166			l2_len += sizeof(struct vxlanhdr);
167		} else
168			udp_dst = ETH_OVER_UDP_PORT;
169		break;
170	}
171	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
172
173	switch (encap_proto) {
174	case IPPROTO_GRE:
175		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
176		olen += sizeof(h_outer.l4hdr.gre);
177		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
178		h_outer.l4hdr.gre.flags = 0;
179		break;
180	case IPPROTO_UDP:
181		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
182		olen += sizeof(h_outer.l4hdr.udp);
183		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
184		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
185		h_outer.l4hdr.udp.check = 0;
186		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
187						  sizeof(h_outer.l4hdr.udp) +
188						  l2_len);
189		break;
190	case IPPROTO_IPIP:
191	case IPPROTO_IPV6:
192		break;
193	default:
194		return TC_ACT_OK;
195	}
196
197	/* add L2 encap (if specified) */
198	l2_hdr = (__u8 *)&h_outer + olen;
199	switch (l2_proto) {
200	case ETH_P_MPLS_UC:
201		*(__u32 *)l2_hdr = mpls_label;
202		break;
203	case ETH_P_TEB:
204		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
205
206		if (ext_proto & EXTPROTO_VXLAN) {
207			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
208
209			vxlan_hdr->vx_flags = VXLAN_FLAGS;
210			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
211
212			l2_hdr += sizeof(struct vxlanhdr);
213		}
214
215		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
216			return TC_ACT_SHOT;
217
218		break;
219	}
220	olen += l2_len;
221
222	/* add room between mac and network header */
223	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
224		return TC_ACT_SHOT;
225
226	/* prepare new outer network header */
227	h_outer.ip = iph_inner;
228	h_outer.ip.tot_len = bpf_htons(olen +
229				       bpf_ntohs(h_outer.ip.tot_len));
230	h_outer.ip.protocol = encap_proto;
231
232	set_ipv4_csum((void *)&h_outer.ip);
233
234	/* store new outer network header */
235	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
236				BPF_F_INVALIDATE_HASH) < 0)
237		return TC_ACT_SHOT;
238
239	/* if changing outer proto type, update eth->h_proto */
240	if (encap_proto == IPPROTO_IPV6) {
241		struct ethhdr eth;
242
243		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
244			return TC_ACT_SHOT;
245		eth.h_proto = bpf_htons(ETH_P_IP);
246		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
247			return TC_ACT_SHOT;
248	}
249
250	return TC_ACT_OK;
251}
252
253static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
254				      __u16 l2_proto)
255{
256	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
257}
258
259static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
260					__u16 l2_proto, __u16 ext_proto)
261{
262	__u16 udp_dst = UDP_PORT;
263	struct ipv6hdr iph_inner;
264	struct v6hdr h_outer;
265	struct tcphdr tcph;
266	int olen, l2_len;
267	__u8 *l2_hdr = NULL;
268	__u16 tot_len;
269	__u64 flags;
270
271	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
272			       sizeof(iph_inner)) < 0)
273		return TC_ACT_OK;
274
275	/* filter only packets we want */
276	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
277			       &tcph, sizeof(tcph)) < 0)
278		return TC_ACT_OK;
279
280	if (tcph.dest != __bpf_constant_htons(cfg_port))
281		return TC_ACT_OK;
282
283	olen = sizeof(h_outer.ip);
284	l2_len = 0;
285
286	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
287
288	switch (l2_proto) {
289	case ETH_P_MPLS_UC:
290		l2_len = sizeof(mpls_label);
291		udp_dst = MPLS_OVER_UDP_PORT;
292		break;
293	case ETH_P_TEB:
294		l2_len = ETH_HLEN;
295		if (ext_proto & EXTPROTO_VXLAN) {
296			udp_dst = VXLAN_UDP_PORT;
297			l2_len += sizeof(struct vxlanhdr);
298		} else
299			udp_dst = ETH_OVER_UDP_PORT;
300		break;
301	}
302	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
303
304	switch (encap_proto) {
305	case IPPROTO_GRE:
306		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
307		olen += sizeof(h_outer.l4hdr.gre);
308		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
309		h_outer.l4hdr.gre.flags = 0;
310		break;
311	case IPPROTO_UDP:
312		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
313		olen += sizeof(h_outer.l4hdr.udp);
314		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
315		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
316		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
317			  sizeof(h_outer.l4hdr.udp) + l2_len;
318		h_outer.l4hdr.udp.check = 0;
319		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
320		break;
321	case IPPROTO_IPV6:
322		break;
323	default:
324		return TC_ACT_OK;
325	}
326
327	/* add L2 encap (if specified) */
328	l2_hdr = (__u8 *)&h_outer + olen;
329	switch (l2_proto) {
330	case ETH_P_MPLS_UC:
331		*(__u32 *)l2_hdr = mpls_label;
332		break;
333	case ETH_P_TEB:
334		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
335
336		if (ext_proto & EXTPROTO_VXLAN) {
337			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
338
339			vxlan_hdr->vx_flags = VXLAN_FLAGS;
340			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
341
342			l2_hdr += sizeof(struct vxlanhdr);
343		}
344
345		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
346			return TC_ACT_SHOT;
347		break;
348	}
349	olen += l2_len;
350
351	/* add room between mac and network header */
352	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
353		return TC_ACT_SHOT;
354
355	/* prepare new outer network header */
356	h_outer.ip = iph_inner;
357	h_outer.ip.payload_len = bpf_htons(olen +
358					   bpf_ntohs(h_outer.ip.payload_len));
359
360	h_outer.ip.nexthdr = encap_proto;
361
362	/* store new outer network header */
363	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
364				BPF_F_INVALIDATE_HASH) < 0)
365		return TC_ACT_SHOT;
366
367	return TC_ACT_OK;
368}
369
370static int encap_ipv6_ipip6(struct __sk_buff *skb)
371{
372	struct iphdr iph_inner;
373	struct v6hdr h_outer;
374	struct tcphdr tcph;
375	struct ethhdr eth;
376	__u64 flags;
377	int olen;
378
379	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
380			       sizeof(iph_inner)) < 0)
381		return TC_ACT_OK;
382
383	/* filter only packets we want */
384	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
385			       &tcph, sizeof(tcph)) < 0)
386		return TC_ACT_OK;
387
388	if (tcph.dest != __bpf_constant_htons(cfg_port))
389		return TC_ACT_OK;
390
391	olen = sizeof(h_outer.ip);
392
393	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
394
395	/* add room between mac and network header */
396	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
397		return TC_ACT_SHOT;
398
399	/* prepare new outer network header */
400	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
401	h_outer.ip.version = 6;
402	h_outer.ip.hop_limit = iph_inner.ttl;
403	h_outer.ip.saddr.s6_addr[1] = 0xfd;
404	h_outer.ip.saddr.s6_addr[15] = 1;
405	h_outer.ip.daddr.s6_addr[1] = 0xfd;
406	h_outer.ip.daddr.s6_addr[15] = 2;
407	h_outer.ip.payload_len = iph_inner.tot_len;
408	h_outer.ip.nexthdr = IPPROTO_IPIP;
409
410	/* store new outer network header */
411	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
412				BPF_F_INVALIDATE_HASH) < 0)
413		return TC_ACT_SHOT;
414
415	/* update eth->h_proto */
416	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
417		return TC_ACT_SHOT;
418	eth.h_proto = bpf_htons(ETH_P_IPV6);
419	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
420		return TC_ACT_SHOT;
421
422	return TC_ACT_OK;
423}
424
425static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
426				      __u16 l2_proto)
427{
428	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
429}
430
431SEC("encap_ipip_none")
432int __encap_ipip_none(struct __sk_buff *skb)
433{
434	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
435		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
436	else
437		return TC_ACT_OK;
438}
439
440SEC("encap_gre_none")
441int __encap_gre_none(struct __sk_buff *skb)
442{
443	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
444		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
445	else
446		return TC_ACT_OK;
447}
448
449SEC("encap_gre_mpls")
450int __encap_gre_mpls(struct __sk_buff *skb)
451{
452	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
453		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
454	else
455		return TC_ACT_OK;
456}
457
458SEC("encap_gre_eth")
459int __encap_gre_eth(struct __sk_buff *skb)
460{
461	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
462		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
463	else
464		return TC_ACT_OK;
465}
466
467SEC("encap_udp_none")
468int __encap_udp_none(struct __sk_buff *skb)
469{
470	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
471		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
472	else
473		return TC_ACT_OK;
474}
475
476SEC("encap_udp_mpls")
477int __encap_udp_mpls(struct __sk_buff *skb)
478{
479	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
480		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
481	else
482		return TC_ACT_OK;
483}
484
485SEC("encap_udp_eth")
486int __encap_udp_eth(struct __sk_buff *skb)
487{
488	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
489		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
490	else
491		return TC_ACT_OK;
492}
493
494SEC("encap_vxlan_eth")
495int __encap_vxlan_eth(struct __sk_buff *skb)
496{
497	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
498		return __encap_ipv4(skb, IPPROTO_UDP,
499				    ETH_P_TEB,
500				    EXTPROTO_VXLAN);
501	else
502		return TC_ACT_OK;
503}
504
505SEC("encap_sit_none")
506int __encap_sit_none(struct __sk_buff *skb)
507{
508	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
509		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
510	else
511		return TC_ACT_OK;
512}
513
514SEC("encap_ip6tnl_none")
515int __encap_ip6tnl_none(struct __sk_buff *skb)
516{
517	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
518		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
519	else
520		return TC_ACT_OK;
521}
522
523SEC("encap_ipip6_none")
524int __encap_ipip6_none(struct __sk_buff *skb)
525{
526	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
527		return encap_ipv6_ipip6(skb);
528	else
529		return TC_ACT_OK;
530}
531
532SEC("encap_ip6gre_none")
533int __encap_ip6gre_none(struct __sk_buff *skb)
534{
535	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
536		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
537	else
538		return TC_ACT_OK;
539}
540
541SEC("encap_ip6gre_mpls")
542int __encap_ip6gre_mpls(struct __sk_buff *skb)
543{
544	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
545		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
546	else
547		return TC_ACT_OK;
548}
549
550SEC("encap_ip6gre_eth")
551int __encap_ip6gre_eth(struct __sk_buff *skb)
552{
553	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
554		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
555	else
556		return TC_ACT_OK;
557}
558
559SEC("encap_ip6udp_none")
560int __encap_ip6udp_none(struct __sk_buff *skb)
561{
562	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
563		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
564	else
565		return TC_ACT_OK;
566}
567
568SEC("encap_ip6udp_mpls")
569int __encap_ip6udp_mpls(struct __sk_buff *skb)
570{
571	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
572		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
573	else
574		return TC_ACT_OK;
575}
576
577SEC("encap_ip6udp_eth")
578int __encap_ip6udp_eth(struct __sk_buff *skb)
579{
580	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
581		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
582	else
583		return TC_ACT_OK;
584}
585
586SEC("encap_ip6vxlan_eth")
587int __encap_ip6vxlan_eth(struct __sk_buff *skb)
588{
589	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
590		return __encap_ipv6(skb, IPPROTO_UDP,
591				    ETH_P_TEB,
592				    EXTPROTO_VXLAN);
593	else
594		return TC_ACT_OK;
595}
596
597static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
598{
599	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
600	struct ipv6_opt_hdr ip6_opt_hdr;
601	struct gre_hdr greh;
602	struct udphdr udph;
603	int olen = len;
604
605	switch (proto) {
606	case IPPROTO_IPIP:
607		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
608		break;
609	case IPPROTO_IPV6:
610		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
611		break;
612	case NEXTHDR_DEST:
613		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
614				       sizeof(ip6_opt_hdr)) < 0)
615			return TC_ACT_OK;
616		switch (ip6_opt_hdr.nexthdr) {
617		case IPPROTO_IPIP:
618			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
619			break;
620		case IPPROTO_IPV6:
621			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
622			break;
623		default:
624			return TC_ACT_OK;
625		}
626		break;
627	case IPPROTO_GRE:
628		olen += sizeof(struct gre_hdr);
629		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
630			return TC_ACT_OK;
631		switch (bpf_ntohs(greh.protocol)) {
632		case ETH_P_MPLS_UC:
633			olen += sizeof(mpls_label);
634			break;
635		case ETH_P_TEB:
636			olen += ETH_HLEN;
637			break;
638		}
639		break;
640	case IPPROTO_UDP:
641		olen += sizeof(struct udphdr);
642		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
643			return TC_ACT_OK;
644		switch (bpf_ntohs(udph.dest)) {
645		case MPLS_OVER_UDP_PORT:
646			olen += sizeof(mpls_label);
647			break;
648		case ETH_OVER_UDP_PORT:
649			olen += ETH_HLEN;
650			break;
651		case VXLAN_UDP_PORT:
652			olen += ETH_HLEN + sizeof(struct vxlanhdr);
653			break;
654		}
655		break;
656	default:
657		return TC_ACT_OK;
658	}
659
660	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
 
661		return TC_ACT_SHOT;
662
663	return TC_ACT_OK;
664}
665
666static int decap_ipv4(struct __sk_buff *skb)
667{
668	struct iphdr iph_outer;
669
670	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
671			       sizeof(iph_outer)) < 0)
672		return TC_ACT_OK;
673
674	if (iph_outer.ihl != 5)
675		return TC_ACT_OK;
676
677	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
678			      iph_outer.protocol);
679}
680
681static int decap_ipv6(struct __sk_buff *skb)
682{
683	struct ipv6hdr iph_outer;
684
685	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
686			       sizeof(iph_outer)) < 0)
687		return TC_ACT_OK;
688
689	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
690			      iph_outer.nexthdr);
691}
692
693SEC("decap")
694int decap_f(struct __sk_buff *skb)
695{
696	switch (skb->protocol) {
697	case __bpf_constant_htons(ETH_P_IP):
698		return decap_ipv4(skb);
699	case __bpf_constant_htons(ETH_P_IPV6):
700		return decap_ipv6(skb);
701	default:
702		/* does not match, ignore */
703		return TC_ACT_OK;
704	}
705}
706
707char __license[] SEC("license") = "GPL";