Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include <bpf/bpf_endian.h>
 21#include <bpf/bpf_helpers.h>
 22#include "bpf_compiler.h"
 23
 24#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
 25
 26static const int cfg_port = 8000;
 27
 28static const int cfg_udp_src = 20000;
 29
 30#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 31
 32#define	UDP_PORT		5555
 33#define	MPLS_OVER_UDP_PORT	6635
 34#define	ETH_OVER_UDP_PORT	7777
 35#define	VXLAN_UDP_PORT		8472
 36
 37#define	EXTPROTO_VXLAN	0x1
 38
 39#define	VXLAN_N_VID     (1u << 24)
 40#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
 41#define	VXLAN_FLAGS     0x8
 42#define	VXLAN_VNI       1
 43
 44#ifndef NEXTHDR_DEST
 45#define NEXTHDR_DEST	60
 46#endif
 47
 48/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 49static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 50						     MPLS_LS_S_MASK | 0xff);
 51
 52struct vxlanhdr {
 53	__be32 vx_flags;
 54	__be32 vx_vni;
 55} __attribute__((packed));
 56
 57struct gre_hdr {
 58	__be16 flags;
 59	__be16 protocol;
 60} __attribute__((packed));
 61
 62union l4hdr {
 63	struct udphdr udp;
 64	struct gre_hdr gre;
 65};
 66
 67struct v4hdr {
 68	struct iphdr ip;
 69	union l4hdr l4hdr;
 70	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 71} __attribute__((packed));
 72
 73struct v6hdr {
 74	struct ipv6hdr ip;
 75	union l4hdr l4hdr;
 76	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 77} __attribute__((packed));
 78
 79static __always_inline void set_ipv4_csum(struct iphdr *iph)
 80{
 81	__u16 *iph16 = (__u16 *)iph;
 82	__u32 csum;
 83	int i;
 84
 85	iph->check = 0;
 86
 87	__pragma_loop_unroll_full
 88	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 89		csum += *iph16++;
 90
 91	iph->check = ~((csum & 0xffff) + (csum >> 16));
 92}
 93
 94static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 95					__u16 l2_proto, __u16 ext_proto)
 96{
 97	__u16 udp_dst = UDP_PORT;
 98	struct iphdr iph_inner;
 99	struct v4hdr h_outer;
100	struct tcphdr tcph;
101	int olen, l2_len;
102	__u8 *l2_hdr = NULL;
103	int tcp_off;
104	__u64 flags;
105
106	/* Most tests encapsulate a packet into a tunnel with the same
107	 * network protocol, and derive the outer header fields from
108	 * the inner header.
109	 *
110	 * The 6in4 case tests different inner and outer protocols. As
111	 * the inner is ipv6, but the outer expects an ipv4 header as
112	 * input, manually build a struct iphdr based on the ipv6hdr.
113	 */
114	if (encap_proto == IPPROTO_IPV6) {
115		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117		struct ipv6hdr iph6_inner;
118
119		/* Read the IPv6 header */
120		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121				       sizeof(iph6_inner)) < 0)
122			return TC_ACT_OK;
123
124		/* Derive the IPv4 header fields from the IPv6 header */
125		memset(&iph_inner, 0, sizeof(iph_inner));
126		iph_inner.version = 4;
127		iph_inner.ihl = 5;
128		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129				    bpf_ntohs(iph6_inner.payload_len));
130		iph_inner.ttl = iph6_inner.hop_limit - 1;
131		iph_inner.protocol = iph6_inner.nexthdr;
132		iph_inner.saddr = __bpf_constant_htonl(saddr);
133		iph_inner.daddr = __bpf_constant_htonl(daddr);
134
135		tcp_off = sizeof(iph6_inner);
136	} else {
137		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138				       sizeof(iph_inner)) < 0)
139			return TC_ACT_OK;
140
141		tcp_off = sizeof(iph_inner);
142	}
143
144	/* filter only packets we want */
145	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146		return TC_ACT_OK;
147
148	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149			       &tcph, sizeof(tcph)) < 0)
150		return TC_ACT_OK;
151
152	if (tcph.dest != __bpf_constant_htons(cfg_port))
153		return TC_ACT_OK;
154
155	olen = sizeof(h_outer.ip);
156	l2_len = 0;
157
158	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159
160	switch (l2_proto) {
161	case ETH_P_MPLS_UC:
162		l2_len = sizeof(mpls_label);
163		udp_dst = MPLS_OVER_UDP_PORT;
164		break;
165	case ETH_P_TEB:
166		l2_len = ETH_HLEN;
167		if (ext_proto & EXTPROTO_VXLAN) {
168			udp_dst = VXLAN_UDP_PORT;
169			l2_len += sizeof(struct vxlanhdr);
170		} else
171			udp_dst = ETH_OVER_UDP_PORT;
172		break;
173	}
174	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175
176	switch (encap_proto) {
177	case IPPROTO_GRE:
178		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179		olen += sizeof(h_outer.l4hdr.gre);
180		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181		h_outer.l4hdr.gre.flags = 0;
182		break;
183	case IPPROTO_UDP:
184		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185		olen += sizeof(h_outer.l4hdr.udp);
186		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188		h_outer.l4hdr.udp.check = 0;
189		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190						  sizeof(h_outer.l4hdr.udp) +
191						  l2_len);
192		break;
193	case IPPROTO_IPIP:
194	case IPPROTO_IPV6:
195		break;
196	default:
197		return TC_ACT_OK;
198	}
199
200	/* add L2 encap (if specified) */
201	l2_hdr = (__u8 *)&h_outer + olen;
202	switch (l2_proto) {
203	case ETH_P_MPLS_UC:
204		*(__u32 *)l2_hdr = mpls_label;
205		break;
206	case ETH_P_TEB:
207		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208
209		if (ext_proto & EXTPROTO_VXLAN) {
210			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211
212			vxlan_hdr->vx_flags = VXLAN_FLAGS;
213			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214
215			l2_hdr += sizeof(struct vxlanhdr);
216		}
217
218		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219			return TC_ACT_SHOT;
220
221		break;
222	}
223	olen += l2_len;
224
225	/* add room between mac and network header */
226	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227		return TC_ACT_SHOT;
228
229	/* prepare new outer network header */
230	h_outer.ip = iph_inner;
231	h_outer.ip.tot_len = bpf_htons(olen +
232				       bpf_ntohs(h_outer.ip.tot_len));
233	h_outer.ip.protocol = encap_proto;
234
235	set_ipv4_csum((void *)&h_outer.ip);
236
237	/* store new outer network header */
238	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239				BPF_F_INVALIDATE_HASH) < 0)
240		return TC_ACT_SHOT;
241
242	/* if changing outer proto type, update eth->h_proto */
243	if (encap_proto == IPPROTO_IPV6) {
244		struct ethhdr eth;
245
246		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
247			return TC_ACT_SHOT;
248		eth.h_proto = bpf_htons(ETH_P_IP);
249		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
250			return TC_ACT_SHOT;
251	}
252
253	return TC_ACT_OK;
254}
255
256static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257				      __u16 l2_proto)
258{
259	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260}
261
262static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263					__u16 l2_proto, __u16 ext_proto)
264{
265	__u16 udp_dst = UDP_PORT;
266	struct ipv6hdr iph_inner;
267	struct v6hdr h_outer;
268	struct tcphdr tcph;
269	int olen, l2_len;
270	__u8 *l2_hdr = NULL;
271	__u16 tot_len;
272	__u64 flags;
273
274	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275			       sizeof(iph_inner)) < 0)
276		return TC_ACT_OK;
277
278	/* filter only packets we want */
279	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280			       &tcph, sizeof(tcph)) < 0)
281		return TC_ACT_OK;
282
283	if (tcph.dest != __bpf_constant_htons(cfg_port))
284		return TC_ACT_OK;
285
286	olen = sizeof(h_outer.ip);
287	l2_len = 0;
288
289	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290
291	switch (l2_proto) {
292	case ETH_P_MPLS_UC:
293		l2_len = sizeof(mpls_label);
294		udp_dst = MPLS_OVER_UDP_PORT;
295		break;
296	case ETH_P_TEB:
297		l2_len = ETH_HLEN;
298		if (ext_proto & EXTPROTO_VXLAN) {
299			udp_dst = VXLAN_UDP_PORT;
300			l2_len += sizeof(struct vxlanhdr);
301		} else
302			udp_dst = ETH_OVER_UDP_PORT;
303		break;
304	}
305	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306
307	switch (encap_proto) {
308	case IPPROTO_GRE:
309		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310		olen += sizeof(h_outer.l4hdr.gre);
311		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312		h_outer.l4hdr.gre.flags = 0;
313		break;
314	case IPPROTO_UDP:
315		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316		olen += sizeof(h_outer.l4hdr.udp);
317		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320			  sizeof(h_outer.l4hdr.udp) + l2_len;
321		h_outer.l4hdr.udp.check = 0;
322		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323		break;
324	case IPPROTO_IPV6:
325		break;
326	default:
327		return TC_ACT_OK;
328	}
329
330	/* add L2 encap (if specified) */
331	l2_hdr = (__u8 *)&h_outer + olen;
332	switch (l2_proto) {
333	case ETH_P_MPLS_UC:
334		*(__u32 *)l2_hdr = mpls_label;
335		break;
336	case ETH_P_TEB:
337		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338
339		if (ext_proto & EXTPROTO_VXLAN) {
340			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341
342			vxlan_hdr->vx_flags = VXLAN_FLAGS;
343			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344
345			l2_hdr += sizeof(struct vxlanhdr);
346		}
347
348		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349			return TC_ACT_SHOT;
350		break;
351	}
352	olen += l2_len;
353
354	/* add room between mac and network header */
355	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356		return TC_ACT_SHOT;
357
358	/* prepare new outer network header */
359	h_outer.ip = iph_inner;
360	h_outer.ip.payload_len = bpf_htons(olen +
361					   bpf_ntohs(h_outer.ip.payload_len));
362
363	h_outer.ip.nexthdr = encap_proto;
364
365	/* store new outer network header */
366	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367				BPF_F_INVALIDATE_HASH) < 0)
368		return TC_ACT_SHOT;
369
370	return TC_ACT_OK;
371}
372
373static int encap_ipv6_ipip6(struct __sk_buff *skb)
374{
375	struct iphdr iph_inner;
376	struct v6hdr h_outer;
377	struct tcphdr tcph;
378	struct ethhdr eth;
379	__u64 flags;
380	int olen;
381
382	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383			       sizeof(iph_inner)) < 0)
384		return TC_ACT_OK;
385
386	/* filter only packets we want */
387	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388			       &tcph, sizeof(tcph)) < 0)
389		return TC_ACT_OK;
390
391	if (tcph.dest != __bpf_constant_htons(cfg_port))
392		return TC_ACT_OK;
393
394	olen = sizeof(h_outer.ip);
395
396	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397
398	/* add room between mac and network header */
399	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400		return TC_ACT_SHOT;
401
402	/* prepare new outer network header */
403	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404	h_outer.ip.version = 6;
405	h_outer.ip.hop_limit = iph_inner.ttl;
406	h_outer.ip.saddr.s6_addr[1] = 0xfd;
407	h_outer.ip.saddr.s6_addr[15] = 1;
408	h_outer.ip.daddr.s6_addr[1] = 0xfd;
409	h_outer.ip.daddr.s6_addr[15] = 2;
410	h_outer.ip.payload_len = iph_inner.tot_len;
411	h_outer.ip.nexthdr = IPPROTO_IPIP;
412
413	/* store new outer network header */
414	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415				BPF_F_INVALIDATE_HASH) < 0)
416		return TC_ACT_SHOT;
417
418	/* update eth->h_proto */
419	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
420		return TC_ACT_SHOT;
421	eth.h_proto = bpf_htons(ETH_P_IPV6);
422	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
423		return TC_ACT_SHOT;
424
425	return TC_ACT_OK;
426}
427
428static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429				      __u16 l2_proto)
430{
431	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432}
433
434SEC("encap_ipip_none")
435int __encap_ipip_none(struct __sk_buff *skb)
436{
437	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439	else
440		return TC_ACT_OK;
441}
442
443SEC("encap_gre_none")
444int __encap_gre_none(struct __sk_buff *skb)
445{
446	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448	else
449		return TC_ACT_OK;
450}
451
452SEC("encap_gre_mpls")
453int __encap_gre_mpls(struct __sk_buff *skb)
454{
455	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457	else
458		return TC_ACT_OK;
459}
460
461SEC("encap_gre_eth")
462int __encap_gre_eth(struct __sk_buff *skb)
463{
464	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466	else
467		return TC_ACT_OK;
468}
469
470SEC("encap_udp_none")
471int __encap_udp_none(struct __sk_buff *skb)
472{
473	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475	else
476		return TC_ACT_OK;
477}
478
479SEC("encap_udp_mpls")
480int __encap_udp_mpls(struct __sk_buff *skb)
481{
482	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484	else
485		return TC_ACT_OK;
486}
487
488SEC("encap_udp_eth")
489int __encap_udp_eth(struct __sk_buff *skb)
490{
491	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493	else
494		return TC_ACT_OK;
495}
496
497SEC("encap_vxlan_eth")
498int __encap_vxlan_eth(struct __sk_buff *skb)
499{
500	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501		return __encap_ipv4(skb, IPPROTO_UDP,
502				    ETH_P_TEB,
503				    EXTPROTO_VXLAN);
504	else
505		return TC_ACT_OK;
506}
507
508SEC("encap_sit_none")
509int __encap_sit_none(struct __sk_buff *skb)
510{
511	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513	else
514		return TC_ACT_OK;
515}
516
517SEC("encap_ip6tnl_none")
518int __encap_ip6tnl_none(struct __sk_buff *skb)
519{
520	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522	else
523		return TC_ACT_OK;
524}
525
526SEC("encap_ipip6_none")
527int __encap_ipip6_none(struct __sk_buff *skb)
528{
529	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530		return encap_ipv6_ipip6(skb);
531	else
532		return TC_ACT_OK;
533}
534
535SEC("encap_ip6gre_none")
536int __encap_ip6gre_none(struct __sk_buff *skb)
537{
538	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540	else
541		return TC_ACT_OK;
542}
543
544SEC("encap_ip6gre_mpls")
545int __encap_ip6gre_mpls(struct __sk_buff *skb)
546{
547	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549	else
550		return TC_ACT_OK;
551}
552
553SEC("encap_ip6gre_eth")
554int __encap_ip6gre_eth(struct __sk_buff *skb)
555{
556	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558	else
559		return TC_ACT_OK;
560}
561
562SEC("encap_ip6udp_none")
563int __encap_ip6udp_none(struct __sk_buff *skb)
564{
565	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567	else
568		return TC_ACT_OK;
569}
570
571SEC("encap_ip6udp_mpls")
572int __encap_ip6udp_mpls(struct __sk_buff *skb)
573{
574	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576	else
577		return TC_ACT_OK;
578}
579
580SEC("encap_ip6udp_eth")
581int __encap_ip6udp_eth(struct __sk_buff *skb)
582{
583	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585	else
586		return TC_ACT_OK;
587}
588
589SEC("encap_ip6vxlan_eth")
590int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591{
592	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593		return __encap_ipv6(skb, IPPROTO_UDP,
594				    ETH_P_TEB,
595				    EXTPROTO_VXLAN);
596	else
597		return TC_ACT_OK;
598}
599
600static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601{
602	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603	struct ipv6_opt_hdr ip6_opt_hdr;
604	struct gre_hdr greh;
605	struct udphdr udph;
606	int olen = len;
607
608	switch (proto) {
609	case IPPROTO_IPIP:
610		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611		break;
612	case IPPROTO_IPV6:
613		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614		break;
615	case NEXTHDR_DEST:
616		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617				       sizeof(ip6_opt_hdr)) < 0)
618			return TC_ACT_OK;
619		switch (ip6_opt_hdr.nexthdr) {
620		case IPPROTO_IPIP:
621			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622			break;
623		case IPPROTO_IPV6:
624			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625			break;
626		default:
627			return TC_ACT_OK;
628		}
629		break;
630	case IPPROTO_GRE:
631		olen += sizeof(struct gre_hdr);
632		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633			return TC_ACT_OK;
634		switch (bpf_ntohs(greh.protocol)) {
635		case ETH_P_MPLS_UC:
636			olen += sizeof(mpls_label);
637			break;
638		case ETH_P_TEB:
639			olen += ETH_HLEN;
640			break;
641		}
642		break;
643	case IPPROTO_UDP:
644		olen += sizeof(struct udphdr);
645		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646			return TC_ACT_OK;
647		switch (bpf_ntohs(udph.dest)) {
648		case MPLS_OVER_UDP_PORT:
649			olen += sizeof(mpls_label);
650			break;
651		case ETH_OVER_UDP_PORT:
652			olen += ETH_HLEN;
653			break;
654		case VXLAN_UDP_PORT:
655			olen += ETH_HLEN + sizeof(struct vxlanhdr);
656			break;
657		}
658		break;
659	default:
660		return TC_ACT_OK;
661	}
662
663	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
 
664		return TC_ACT_SHOT;
665
666	return TC_ACT_OK;
667}
668
669static int decap_ipv4(struct __sk_buff *skb)
670{
671	struct iphdr iph_outer;
672
673	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674			       sizeof(iph_outer)) < 0)
675		return TC_ACT_OK;
676
677	if (iph_outer.ihl != 5)
678		return TC_ACT_OK;
679
680	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681			      iph_outer.protocol);
682}
683
684static int decap_ipv6(struct __sk_buff *skb)
685{
686	struct ipv6hdr iph_outer;
687
688	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689			       sizeof(iph_outer)) < 0)
690		return TC_ACT_OK;
691
692	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693			      iph_outer.nexthdr);
694}
695
696SEC("decap")
697int decap_f(struct __sk_buff *skb)
698{
699	switch (skb->protocol) {
700	case __bpf_constant_htons(ETH_P_IP):
701		return decap_ipv4(skb);
702	case __bpf_constant_htons(ETH_P_IPV6):
703		return decap_ipv6(skb);
704	default:
705		/* does not match, ignore */
706		return TC_ACT_OK;
707	}
708}
709
710char __license[] SEC("license") = "GPL";
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2
  3/* In-place tunneling */
  4
  5#include <stdbool.h>
  6#include <string.h>
  7
  8#include <linux/stddef.h>
  9#include <linux/bpf.h>
 10#include <linux/if_ether.h>
 11#include <linux/in.h>
 12#include <linux/ip.h>
 13#include <linux/ipv6.h>
 14#include <linux/mpls.h>
 15#include <linux/tcp.h>
 16#include <linux/udp.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/types.h>
 19
 20#include "bpf_endian.h"
 21#include "bpf_helpers.h"
 
 
 
 22
 23static const int cfg_port = 8000;
 24
 25static const int cfg_udp_src = 20000;
 26
 
 
 27#define	UDP_PORT		5555
 28#define	MPLS_OVER_UDP_PORT	6635
 29#define	ETH_OVER_UDP_PORT	7777
 
 
 
 
 
 
 
 
 
 
 
 
 30
 31/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 32static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 33						     MPLS_LS_S_MASK | 0xff);
 34
 
 
 
 
 
 35struct gre_hdr {
 36	__be16 flags;
 37	__be16 protocol;
 38} __attribute__((packed));
 39
 40union l4hdr {
 41	struct udphdr udp;
 42	struct gre_hdr gre;
 43};
 44
 45struct v4hdr {
 46	struct iphdr ip;
 47	union l4hdr l4hdr;
 48	__u8 pad[16];			/* enough space for L2 header */
 49} __attribute__((packed));
 50
 51struct v6hdr {
 52	struct ipv6hdr ip;
 53	union l4hdr l4hdr;
 54	__u8 pad[16];			/* enough space for L2 header */
 55} __attribute__((packed));
 56
 57static __always_inline void set_ipv4_csum(struct iphdr *iph)
 58{
 59	__u16 *iph16 = (__u16 *)iph;
 60	__u32 csum;
 61	int i;
 62
 63	iph->check = 0;
 64
 65#pragma clang loop unroll(full)
 66	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
 67		csum += *iph16++;
 68
 69	iph->check = ~((csum & 0xffff) + (csum >> 16));
 70}
 71
 72static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 73				      __u16 l2_proto)
 74{
 75	__u16 udp_dst = UDP_PORT;
 76	struct iphdr iph_inner;
 77	struct v4hdr h_outer;
 78	struct tcphdr tcph;
 79	int olen, l2_len;
 
 80	int tcp_off;
 81	__u64 flags;
 82
 83	/* Most tests encapsulate a packet into a tunnel with the same
 84	 * network protocol, and derive the outer header fields from
 85	 * the inner header.
 86	 *
 87	 * The 6in4 case tests different inner and outer protocols. As
 88	 * the inner is ipv6, but the outer expects an ipv4 header as
 89	 * input, manually build a struct iphdr based on the ipv6hdr.
 90	 */
 91	if (encap_proto == IPPROTO_IPV6) {
 92		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
 93		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
 94		struct ipv6hdr iph6_inner;
 95
 96		/* Read the IPv6 header */
 97		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
 98				       sizeof(iph6_inner)) < 0)
 99			return TC_ACT_OK;
100
101		/* Derive the IPv4 header fields from the IPv6 header */
102		memset(&iph_inner, 0, sizeof(iph_inner));
103		iph_inner.version = 4;
104		iph_inner.ihl = 5;
105		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
106				    bpf_ntohs(iph6_inner.payload_len));
107		iph_inner.ttl = iph6_inner.hop_limit - 1;
108		iph_inner.protocol = iph6_inner.nexthdr;
109		iph_inner.saddr = __bpf_constant_htonl(saddr);
110		iph_inner.daddr = __bpf_constant_htonl(daddr);
111
112		tcp_off = sizeof(iph6_inner);
113	} else {
114		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
115				       sizeof(iph_inner)) < 0)
116			return TC_ACT_OK;
117
118		tcp_off = sizeof(iph_inner);
119	}
120
121	/* filter only packets we want */
122	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
123		return TC_ACT_OK;
124
125	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
126			       &tcph, sizeof(tcph)) < 0)
127		return TC_ACT_OK;
128
129	if (tcph.dest != __bpf_constant_htons(cfg_port))
130		return TC_ACT_OK;
131
132	olen = sizeof(h_outer.ip);
133	l2_len = 0;
134
135	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
136
137	switch (l2_proto) {
138	case ETH_P_MPLS_UC:
139		l2_len = sizeof(mpls_label);
140		udp_dst = MPLS_OVER_UDP_PORT;
141		break;
142	case ETH_P_TEB:
143		l2_len = ETH_HLEN;
144		udp_dst = ETH_OVER_UDP_PORT;
 
 
 
 
145		break;
146	}
147	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
148
149	switch (encap_proto) {
150	case IPPROTO_GRE:
151		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
152		olen += sizeof(h_outer.l4hdr.gre);
153		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
154		h_outer.l4hdr.gre.flags = 0;
155		break;
156	case IPPROTO_UDP:
157		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
158		olen += sizeof(h_outer.l4hdr.udp);
159		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
160		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
161		h_outer.l4hdr.udp.check = 0;
162		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
163						  sizeof(h_outer.l4hdr.udp) +
164						  l2_len);
165		break;
166	case IPPROTO_IPIP:
167	case IPPROTO_IPV6:
168		break;
169	default:
170		return TC_ACT_OK;
171	}
172
173	/* add L2 encap (if specified) */
 
174	switch (l2_proto) {
175	case ETH_P_MPLS_UC:
176		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
177		break;
178	case ETH_P_TEB:
179		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
180				       ETH_HLEN))
 
 
 
 
 
 
 
 
 
 
181			return TC_ACT_SHOT;
 
182		break;
183	}
184	olen += l2_len;
185
186	/* add room between mac and network header */
187	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
188		return TC_ACT_SHOT;
189
190	/* prepare new outer network header */
191	h_outer.ip = iph_inner;
192	h_outer.ip.tot_len = bpf_htons(olen +
193				       bpf_ntohs(h_outer.ip.tot_len));
194	h_outer.ip.protocol = encap_proto;
195
196	set_ipv4_csum((void *)&h_outer.ip);
197
198	/* store new outer network header */
199	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
200				BPF_F_INVALIDATE_HASH) < 0)
201		return TC_ACT_SHOT;
202
203	/* if changing outer proto type, update eth->h_proto */
204	if (encap_proto == IPPROTO_IPV6) {
205		struct ethhdr eth;
206
207		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
208			return TC_ACT_SHOT;
209		eth.h_proto = bpf_htons(ETH_P_IP);
210		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
211			return TC_ACT_SHOT;
212	}
213
214	return TC_ACT_OK;
215}
216
217static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
218				      __u16 l2_proto)
219{
 
 
 
 
 
 
220	__u16 udp_dst = UDP_PORT;
221	struct ipv6hdr iph_inner;
222	struct v6hdr h_outer;
223	struct tcphdr tcph;
224	int olen, l2_len;
 
225	__u16 tot_len;
226	__u64 flags;
227
228	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
229			       sizeof(iph_inner)) < 0)
230		return TC_ACT_OK;
231
232	/* filter only packets we want */
233	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
234			       &tcph, sizeof(tcph)) < 0)
235		return TC_ACT_OK;
236
237	if (tcph.dest != __bpf_constant_htons(cfg_port))
238		return TC_ACT_OK;
239
240	olen = sizeof(h_outer.ip);
241	l2_len = 0;
242
243	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
244
245	switch (l2_proto) {
246	case ETH_P_MPLS_UC:
247		l2_len = sizeof(mpls_label);
248		udp_dst = MPLS_OVER_UDP_PORT;
249		break;
250	case ETH_P_TEB:
251		l2_len = ETH_HLEN;
252		udp_dst = ETH_OVER_UDP_PORT;
 
 
 
 
253		break;
254	}
255	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
256
257	switch (encap_proto) {
258	case IPPROTO_GRE:
259		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
260		olen += sizeof(h_outer.l4hdr.gre);
261		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
262		h_outer.l4hdr.gre.flags = 0;
263		break;
264	case IPPROTO_UDP:
265		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
266		olen += sizeof(h_outer.l4hdr.udp);
267		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
268		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
269		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
270			  sizeof(h_outer.l4hdr.udp);
271		h_outer.l4hdr.udp.check = 0;
272		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
273		break;
274	case IPPROTO_IPV6:
275		break;
276	default:
277		return TC_ACT_OK;
278	}
279
280	/* add L2 encap (if specified) */
 
281	switch (l2_proto) {
282	case ETH_P_MPLS_UC:
283		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
284		break;
285	case ETH_P_TEB:
286		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
287				       ETH_HLEN))
 
 
 
 
 
 
 
 
 
 
288			return TC_ACT_SHOT;
289		break;
290	}
291	olen += l2_len;
292
293	/* add room between mac and network header */
294	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
295		return TC_ACT_SHOT;
296
297	/* prepare new outer network header */
298	h_outer.ip = iph_inner;
299	h_outer.ip.payload_len = bpf_htons(olen +
300					   bpf_ntohs(h_outer.ip.payload_len));
301
302	h_outer.ip.nexthdr = encap_proto;
303
304	/* store new outer network header */
305	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
306				BPF_F_INVALIDATE_HASH) < 0)
307		return TC_ACT_SHOT;
308
309	return TC_ACT_OK;
310}
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312SEC("encap_ipip_none")
313int __encap_ipip_none(struct __sk_buff *skb)
314{
315	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
316		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
317	else
318		return TC_ACT_OK;
319}
320
321SEC("encap_gre_none")
322int __encap_gre_none(struct __sk_buff *skb)
323{
324	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
325		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
326	else
327		return TC_ACT_OK;
328}
329
330SEC("encap_gre_mpls")
331int __encap_gre_mpls(struct __sk_buff *skb)
332{
333	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
334		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
335	else
336		return TC_ACT_OK;
337}
338
339SEC("encap_gre_eth")
340int __encap_gre_eth(struct __sk_buff *skb)
341{
342	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
343		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
344	else
345		return TC_ACT_OK;
346}
347
348SEC("encap_udp_none")
349int __encap_udp_none(struct __sk_buff *skb)
350{
351	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
352		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
353	else
354		return TC_ACT_OK;
355}
356
357SEC("encap_udp_mpls")
358int __encap_udp_mpls(struct __sk_buff *skb)
359{
360	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
361		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
362	else
363		return TC_ACT_OK;
364}
365
366SEC("encap_udp_eth")
367int __encap_udp_eth(struct __sk_buff *skb)
368{
369	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
370		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
371	else
372		return TC_ACT_OK;
373}
374
 
 
 
 
 
 
 
 
 
 
 
375SEC("encap_sit_none")
376int __encap_sit_none(struct __sk_buff *skb)
377{
378	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
379		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
380	else
381		return TC_ACT_OK;
382}
383
384SEC("encap_ip6tnl_none")
385int __encap_ip6tnl_none(struct __sk_buff *skb)
386{
387	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
388		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
389	else
390		return TC_ACT_OK;
391}
392
 
 
 
 
 
 
 
 
 
393SEC("encap_ip6gre_none")
394int __encap_ip6gre_none(struct __sk_buff *skb)
395{
396	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
397		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
398	else
399		return TC_ACT_OK;
400}
401
402SEC("encap_ip6gre_mpls")
403int __encap_ip6gre_mpls(struct __sk_buff *skb)
404{
405	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
406		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
407	else
408		return TC_ACT_OK;
409}
410
411SEC("encap_ip6gre_eth")
412int __encap_ip6gre_eth(struct __sk_buff *skb)
413{
414	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
415		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
416	else
417		return TC_ACT_OK;
418}
419
420SEC("encap_ip6udp_none")
421int __encap_ip6udp_none(struct __sk_buff *skb)
422{
423	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
424		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
425	else
426		return TC_ACT_OK;
427}
428
429SEC("encap_ip6udp_mpls")
430int __encap_ip6udp_mpls(struct __sk_buff *skb)
431{
432	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
433		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
434	else
435		return TC_ACT_OK;
436}
437
438SEC("encap_ip6udp_eth")
439int __encap_ip6udp_eth(struct __sk_buff *skb)
440{
441	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
442		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
443	else
444		return TC_ACT_OK;
445}
446
 
 
 
 
 
 
 
 
 
 
 
447static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
448{
449	char buf[sizeof(struct v6hdr)];
 
450	struct gre_hdr greh;
451	struct udphdr udph;
452	int olen = len;
453
454	switch (proto) {
455	case IPPROTO_IPIP:
 
 
456	case IPPROTO_IPV6:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457		break;
458	case IPPROTO_GRE:
459		olen += sizeof(struct gre_hdr);
460		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
461			return TC_ACT_OK;
462		switch (bpf_ntohs(greh.protocol)) {
463		case ETH_P_MPLS_UC:
464			olen += sizeof(mpls_label);
465			break;
466		case ETH_P_TEB:
467			olen += ETH_HLEN;
468			break;
469		}
470		break;
471	case IPPROTO_UDP:
472		olen += sizeof(struct udphdr);
473		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
474			return TC_ACT_OK;
475		switch (bpf_ntohs(udph.dest)) {
476		case MPLS_OVER_UDP_PORT:
477			olen += sizeof(mpls_label);
478			break;
479		case ETH_OVER_UDP_PORT:
480			olen += ETH_HLEN;
481			break;
 
 
 
482		}
483		break;
484	default:
485		return TC_ACT_OK;
486	}
487
488	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
489				BPF_F_ADJ_ROOM_FIXED_GSO))
490		return TC_ACT_SHOT;
491
492	return TC_ACT_OK;
493}
494
495static int decap_ipv4(struct __sk_buff *skb)
496{
497	struct iphdr iph_outer;
498
499	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
500			       sizeof(iph_outer)) < 0)
501		return TC_ACT_OK;
502
503	if (iph_outer.ihl != 5)
504		return TC_ACT_OK;
505
506	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
507			      iph_outer.protocol);
508}
509
510static int decap_ipv6(struct __sk_buff *skb)
511{
512	struct ipv6hdr iph_outer;
513
514	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
515			       sizeof(iph_outer)) < 0)
516		return TC_ACT_OK;
517
518	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
519			      iph_outer.nexthdr);
520}
521
522SEC("decap")
523int decap_f(struct __sk_buff *skb)
524{
525	switch (skb->protocol) {
526	case __bpf_constant_htons(ETH_P_IP):
527		return decap_ipv4(skb);
528	case __bpf_constant_htons(ETH_P_IPV6):
529		return decap_ipv6(skb);
530	default:
531		/* does not match, ignore */
532		return TC_ACT_OK;
533	}
534}
535
536char __license[] SEC("license") = "GPL";