Loading...
1// SPDX-License-Identifier: GPL-2.0
2
3/* In-place tunneling */
4
5#include <stdbool.h>
6#include <string.h>
7
8#include <linux/stddef.h>
9#include <linux/bpf.h>
10#include <linux/if_ether.h>
11#include <linux/in.h>
12#include <linux/ip.h>
13#include <linux/ipv6.h>
14#include <linux/mpls.h>
15#include <linux/tcp.h>
16#include <linux/udp.h>
17#include <linux/pkt_cls.h>
18#include <linux/types.h>
19
20#include <bpf/bpf_endian.h>
21#include <bpf/bpf_helpers.h>
22#include "bpf_compiler.h"
23
24#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
25
26static const int cfg_port = 8000;
27
28static const int cfg_udp_src = 20000;
29
30#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
31
32#define UDP_PORT 5555
33#define MPLS_OVER_UDP_PORT 6635
34#define ETH_OVER_UDP_PORT 7777
35#define VXLAN_UDP_PORT 8472
36
37#define EXTPROTO_VXLAN 0x1
38
39#define VXLAN_N_VID (1u << 24)
40#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
41#define VXLAN_FLAGS 0x8
42#define VXLAN_VNI 1
43
44#ifndef NEXTHDR_DEST
45#define NEXTHDR_DEST 60
46#endif
47
48/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
49static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
50 MPLS_LS_S_MASK | 0xff);
51
52struct vxlanhdr {
53 __be32 vx_flags;
54 __be32 vx_vni;
55} __attribute__((packed));
56
57struct gre_hdr {
58 __be16 flags;
59 __be16 protocol;
60} __attribute__((packed));
61
62union l4hdr {
63 struct udphdr udp;
64 struct gre_hdr gre;
65};
66
67struct v4hdr {
68 struct iphdr ip;
69 union l4hdr l4hdr;
70 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
71} __attribute__((packed));
72
73struct v6hdr {
74 struct ipv6hdr ip;
75 union l4hdr l4hdr;
76 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
77} __attribute__((packed));
78
79static __always_inline void set_ipv4_csum(struct iphdr *iph)
80{
81 __u16 *iph16 = (__u16 *)iph;
82 __u32 csum;
83 int i;
84
85 iph->check = 0;
86
87 __pragma_loop_unroll_full
88 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
89 csum += *iph16++;
90
91 iph->check = ~((csum & 0xffff) + (csum >> 16));
92}
93
94static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
95 __u16 l2_proto, __u16 ext_proto)
96{
97 __u16 udp_dst = UDP_PORT;
98 struct iphdr iph_inner;
99 struct v4hdr h_outer;
100 struct tcphdr tcph;
101 int olen, l2_len;
102 __u8 *l2_hdr = NULL;
103 int tcp_off;
104 __u64 flags;
105
106 /* Most tests encapsulate a packet into a tunnel with the same
107 * network protocol, and derive the outer header fields from
108 * the inner header.
109 *
110 * The 6in4 case tests different inner and outer protocols. As
111 * the inner is ipv6, but the outer expects an ipv4 header as
112 * input, manually build a struct iphdr based on the ipv6hdr.
113 */
114 if (encap_proto == IPPROTO_IPV6) {
115 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117 struct ipv6hdr iph6_inner;
118
119 /* Read the IPv6 header */
120 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121 sizeof(iph6_inner)) < 0)
122 return TC_ACT_OK;
123
124 /* Derive the IPv4 header fields from the IPv6 header */
125 memset(&iph_inner, 0, sizeof(iph_inner));
126 iph_inner.version = 4;
127 iph_inner.ihl = 5;
128 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129 bpf_ntohs(iph6_inner.payload_len));
130 iph_inner.ttl = iph6_inner.hop_limit - 1;
131 iph_inner.protocol = iph6_inner.nexthdr;
132 iph_inner.saddr = __bpf_constant_htonl(saddr);
133 iph_inner.daddr = __bpf_constant_htonl(daddr);
134
135 tcp_off = sizeof(iph6_inner);
136 } else {
137 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138 sizeof(iph_inner)) < 0)
139 return TC_ACT_OK;
140
141 tcp_off = sizeof(iph_inner);
142 }
143
144 /* filter only packets we want */
145 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146 return TC_ACT_OK;
147
148 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149 &tcph, sizeof(tcph)) < 0)
150 return TC_ACT_OK;
151
152 if (tcph.dest != __bpf_constant_htons(cfg_port))
153 return TC_ACT_OK;
154
155 olen = sizeof(h_outer.ip);
156 l2_len = 0;
157
158 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159
160 switch (l2_proto) {
161 case ETH_P_MPLS_UC:
162 l2_len = sizeof(mpls_label);
163 udp_dst = MPLS_OVER_UDP_PORT;
164 break;
165 case ETH_P_TEB:
166 l2_len = ETH_HLEN;
167 if (ext_proto & EXTPROTO_VXLAN) {
168 udp_dst = VXLAN_UDP_PORT;
169 l2_len += sizeof(struct vxlanhdr);
170 } else
171 udp_dst = ETH_OVER_UDP_PORT;
172 break;
173 }
174 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175
176 switch (encap_proto) {
177 case IPPROTO_GRE:
178 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179 olen += sizeof(h_outer.l4hdr.gre);
180 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181 h_outer.l4hdr.gre.flags = 0;
182 break;
183 case IPPROTO_UDP:
184 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185 olen += sizeof(h_outer.l4hdr.udp);
186 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188 h_outer.l4hdr.udp.check = 0;
189 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190 sizeof(h_outer.l4hdr.udp) +
191 l2_len);
192 break;
193 case IPPROTO_IPIP:
194 case IPPROTO_IPV6:
195 break;
196 default:
197 return TC_ACT_OK;
198 }
199
200 /* add L2 encap (if specified) */
201 l2_hdr = (__u8 *)&h_outer + olen;
202 switch (l2_proto) {
203 case ETH_P_MPLS_UC:
204 *(__u32 *)l2_hdr = mpls_label;
205 break;
206 case ETH_P_TEB:
207 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208
209 if (ext_proto & EXTPROTO_VXLAN) {
210 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211
212 vxlan_hdr->vx_flags = VXLAN_FLAGS;
213 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214
215 l2_hdr += sizeof(struct vxlanhdr);
216 }
217
218 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219 return TC_ACT_SHOT;
220
221 break;
222 }
223 olen += l2_len;
224
225 /* add room between mac and network header */
226 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227 return TC_ACT_SHOT;
228
229 /* prepare new outer network header */
230 h_outer.ip = iph_inner;
231 h_outer.ip.tot_len = bpf_htons(olen +
232 bpf_ntohs(h_outer.ip.tot_len));
233 h_outer.ip.protocol = encap_proto;
234
235 set_ipv4_csum((void *)&h_outer.ip);
236
237 /* store new outer network header */
238 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239 BPF_F_INVALIDATE_HASH) < 0)
240 return TC_ACT_SHOT;
241
242 /* if changing outer proto type, update eth->h_proto */
243 if (encap_proto == IPPROTO_IPV6) {
244 struct ethhdr eth;
245
246 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
247 return TC_ACT_SHOT;
248 eth.h_proto = bpf_htons(ETH_P_IP);
249 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
250 return TC_ACT_SHOT;
251 }
252
253 return TC_ACT_OK;
254}
255
256static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257 __u16 l2_proto)
258{
259 return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260}
261
262static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263 __u16 l2_proto, __u16 ext_proto)
264{
265 __u16 udp_dst = UDP_PORT;
266 struct ipv6hdr iph_inner;
267 struct v6hdr h_outer;
268 struct tcphdr tcph;
269 int olen, l2_len;
270 __u8 *l2_hdr = NULL;
271 __u16 tot_len;
272 __u64 flags;
273
274 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275 sizeof(iph_inner)) < 0)
276 return TC_ACT_OK;
277
278 /* filter only packets we want */
279 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280 &tcph, sizeof(tcph)) < 0)
281 return TC_ACT_OK;
282
283 if (tcph.dest != __bpf_constant_htons(cfg_port))
284 return TC_ACT_OK;
285
286 olen = sizeof(h_outer.ip);
287 l2_len = 0;
288
289 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290
291 switch (l2_proto) {
292 case ETH_P_MPLS_UC:
293 l2_len = sizeof(mpls_label);
294 udp_dst = MPLS_OVER_UDP_PORT;
295 break;
296 case ETH_P_TEB:
297 l2_len = ETH_HLEN;
298 if (ext_proto & EXTPROTO_VXLAN) {
299 udp_dst = VXLAN_UDP_PORT;
300 l2_len += sizeof(struct vxlanhdr);
301 } else
302 udp_dst = ETH_OVER_UDP_PORT;
303 break;
304 }
305 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306
307 switch (encap_proto) {
308 case IPPROTO_GRE:
309 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310 olen += sizeof(h_outer.l4hdr.gre);
311 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312 h_outer.l4hdr.gre.flags = 0;
313 break;
314 case IPPROTO_UDP:
315 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316 olen += sizeof(h_outer.l4hdr.udp);
317 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320 sizeof(h_outer.l4hdr.udp) + l2_len;
321 h_outer.l4hdr.udp.check = 0;
322 h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323 break;
324 case IPPROTO_IPV6:
325 break;
326 default:
327 return TC_ACT_OK;
328 }
329
330 /* add L2 encap (if specified) */
331 l2_hdr = (__u8 *)&h_outer + olen;
332 switch (l2_proto) {
333 case ETH_P_MPLS_UC:
334 *(__u32 *)l2_hdr = mpls_label;
335 break;
336 case ETH_P_TEB:
337 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338
339 if (ext_proto & EXTPROTO_VXLAN) {
340 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341
342 vxlan_hdr->vx_flags = VXLAN_FLAGS;
343 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344
345 l2_hdr += sizeof(struct vxlanhdr);
346 }
347
348 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349 return TC_ACT_SHOT;
350 break;
351 }
352 olen += l2_len;
353
354 /* add room between mac and network header */
355 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356 return TC_ACT_SHOT;
357
358 /* prepare new outer network header */
359 h_outer.ip = iph_inner;
360 h_outer.ip.payload_len = bpf_htons(olen +
361 bpf_ntohs(h_outer.ip.payload_len));
362
363 h_outer.ip.nexthdr = encap_proto;
364
365 /* store new outer network header */
366 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367 BPF_F_INVALIDATE_HASH) < 0)
368 return TC_ACT_SHOT;
369
370 return TC_ACT_OK;
371}
372
373static int encap_ipv6_ipip6(struct __sk_buff *skb)
374{
375 struct iphdr iph_inner;
376 struct v6hdr h_outer;
377 struct tcphdr tcph;
378 struct ethhdr eth;
379 __u64 flags;
380 int olen;
381
382 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383 sizeof(iph_inner)) < 0)
384 return TC_ACT_OK;
385
386 /* filter only packets we want */
387 if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388 &tcph, sizeof(tcph)) < 0)
389 return TC_ACT_OK;
390
391 if (tcph.dest != __bpf_constant_htons(cfg_port))
392 return TC_ACT_OK;
393
394 olen = sizeof(h_outer.ip);
395
396 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397
398 /* add room between mac and network header */
399 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400 return TC_ACT_SHOT;
401
402 /* prepare new outer network header */
403 memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404 h_outer.ip.version = 6;
405 h_outer.ip.hop_limit = iph_inner.ttl;
406 h_outer.ip.saddr.s6_addr[1] = 0xfd;
407 h_outer.ip.saddr.s6_addr[15] = 1;
408 h_outer.ip.daddr.s6_addr[1] = 0xfd;
409 h_outer.ip.daddr.s6_addr[15] = 2;
410 h_outer.ip.payload_len = iph_inner.tot_len;
411 h_outer.ip.nexthdr = IPPROTO_IPIP;
412
413 /* store new outer network header */
414 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415 BPF_F_INVALIDATE_HASH) < 0)
416 return TC_ACT_SHOT;
417
418 /* update eth->h_proto */
419 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
420 return TC_ACT_SHOT;
421 eth.h_proto = bpf_htons(ETH_P_IPV6);
422 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
423 return TC_ACT_SHOT;
424
425 return TC_ACT_OK;
426}
427
428static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429 __u16 l2_proto)
430{
431 return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432}
433
434SEC("encap_ipip_none")
435int __encap_ipip_none(struct __sk_buff *skb)
436{
437 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439 else
440 return TC_ACT_OK;
441}
442
443SEC("encap_gre_none")
444int __encap_gre_none(struct __sk_buff *skb)
445{
446 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448 else
449 return TC_ACT_OK;
450}
451
452SEC("encap_gre_mpls")
453int __encap_gre_mpls(struct __sk_buff *skb)
454{
455 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457 else
458 return TC_ACT_OK;
459}
460
461SEC("encap_gre_eth")
462int __encap_gre_eth(struct __sk_buff *skb)
463{
464 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466 else
467 return TC_ACT_OK;
468}
469
470SEC("encap_udp_none")
471int __encap_udp_none(struct __sk_buff *skb)
472{
473 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475 else
476 return TC_ACT_OK;
477}
478
479SEC("encap_udp_mpls")
480int __encap_udp_mpls(struct __sk_buff *skb)
481{
482 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484 else
485 return TC_ACT_OK;
486}
487
488SEC("encap_udp_eth")
489int __encap_udp_eth(struct __sk_buff *skb)
490{
491 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493 else
494 return TC_ACT_OK;
495}
496
497SEC("encap_vxlan_eth")
498int __encap_vxlan_eth(struct __sk_buff *skb)
499{
500 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501 return __encap_ipv4(skb, IPPROTO_UDP,
502 ETH_P_TEB,
503 EXTPROTO_VXLAN);
504 else
505 return TC_ACT_OK;
506}
507
508SEC("encap_sit_none")
509int __encap_sit_none(struct __sk_buff *skb)
510{
511 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513 else
514 return TC_ACT_OK;
515}
516
517SEC("encap_ip6tnl_none")
518int __encap_ip6tnl_none(struct __sk_buff *skb)
519{
520 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522 else
523 return TC_ACT_OK;
524}
525
526SEC("encap_ipip6_none")
527int __encap_ipip6_none(struct __sk_buff *skb)
528{
529 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530 return encap_ipv6_ipip6(skb);
531 else
532 return TC_ACT_OK;
533}
534
535SEC("encap_ip6gre_none")
536int __encap_ip6gre_none(struct __sk_buff *skb)
537{
538 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540 else
541 return TC_ACT_OK;
542}
543
544SEC("encap_ip6gre_mpls")
545int __encap_ip6gre_mpls(struct __sk_buff *skb)
546{
547 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549 else
550 return TC_ACT_OK;
551}
552
553SEC("encap_ip6gre_eth")
554int __encap_ip6gre_eth(struct __sk_buff *skb)
555{
556 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558 else
559 return TC_ACT_OK;
560}
561
562SEC("encap_ip6udp_none")
563int __encap_ip6udp_none(struct __sk_buff *skb)
564{
565 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567 else
568 return TC_ACT_OK;
569}
570
571SEC("encap_ip6udp_mpls")
572int __encap_ip6udp_mpls(struct __sk_buff *skb)
573{
574 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576 else
577 return TC_ACT_OK;
578}
579
580SEC("encap_ip6udp_eth")
581int __encap_ip6udp_eth(struct __sk_buff *skb)
582{
583 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585 else
586 return TC_ACT_OK;
587}
588
589SEC("encap_ip6vxlan_eth")
590int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591{
592 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593 return __encap_ipv6(skb, IPPROTO_UDP,
594 ETH_P_TEB,
595 EXTPROTO_VXLAN);
596 else
597 return TC_ACT_OK;
598}
599
600static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601{
602 __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603 struct ipv6_opt_hdr ip6_opt_hdr;
604 struct gre_hdr greh;
605 struct udphdr udph;
606 int olen = len;
607
608 switch (proto) {
609 case IPPROTO_IPIP:
610 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611 break;
612 case IPPROTO_IPV6:
613 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614 break;
615 case NEXTHDR_DEST:
616 if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617 sizeof(ip6_opt_hdr)) < 0)
618 return TC_ACT_OK;
619 switch (ip6_opt_hdr.nexthdr) {
620 case IPPROTO_IPIP:
621 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622 break;
623 case IPPROTO_IPV6:
624 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625 break;
626 default:
627 return TC_ACT_OK;
628 }
629 break;
630 case IPPROTO_GRE:
631 olen += sizeof(struct gre_hdr);
632 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633 return TC_ACT_OK;
634 switch (bpf_ntohs(greh.protocol)) {
635 case ETH_P_MPLS_UC:
636 olen += sizeof(mpls_label);
637 break;
638 case ETH_P_TEB:
639 olen += ETH_HLEN;
640 break;
641 }
642 break;
643 case IPPROTO_UDP:
644 olen += sizeof(struct udphdr);
645 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646 return TC_ACT_OK;
647 switch (bpf_ntohs(udph.dest)) {
648 case MPLS_OVER_UDP_PORT:
649 olen += sizeof(mpls_label);
650 break;
651 case ETH_OVER_UDP_PORT:
652 olen += ETH_HLEN;
653 break;
654 case VXLAN_UDP_PORT:
655 olen += ETH_HLEN + sizeof(struct vxlanhdr);
656 break;
657 }
658 break;
659 default:
660 return TC_ACT_OK;
661 }
662
663 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
664 return TC_ACT_SHOT;
665
666 return TC_ACT_OK;
667}
668
669static int decap_ipv4(struct __sk_buff *skb)
670{
671 struct iphdr iph_outer;
672
673 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674 sizeof(iph_outer)) < 0)
675 return TC_ACT_OK;
676
677 if (iph_outer.ihl != 5)
678 return TC_ACT_OK;
679
680 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681 iph_outer.protocol);
682}
683
684static int decap_ipv6(struct __sk_buff *skb)
685{
686 struct ipv6hdr iph_outer;
687
688 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689 sizeof(iph_outer)) < 0)
690 return TC_ACT_OK;
691
692 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693 iph_outer.nexthdr);
694}
695
696SEC("decap")
697int decap_f(struct __sk_buff *skb)
698{
699 switch (skb->protocol) {
700 case __bpf_constant_htons(ETH_P_IP):
701 return decap_ipv4(skb);
702 case __bpf_constant_htons(ETH_P_IPV6):
703 return decap_ipv6(skb);
704 default:
705 /* does not match, ignore */
706 return TC_ACT_OK;
707 }
708}
709
710char __license[] SEC("license") = "GPL";
1// SPDX-License-Identifier: GPL-2.0
2
3/* In-place tunneling */
4
5#include <stdbool.h>
6#include <string.h>
7
8#include <linux/stddef.h>
9#include <linux/bpf.h>
10#include <linux/if_ether.h>
11#include <linux/in.h>
12#include <linux/ip.h>
13#include <linux/ipv6.h>
14#include <linux/mpls.h>
15#include <linux/tcp.h>
16#include <linux/udp.h>
17#include <linux/pkt_cls.h>
18#include <linux/types.h>
19
20#include <bpf/bpf_endian.h>
21#include <bpf/bpf_helpers.h>
22
23static const int cfg_port = 8000;
24
25static const int cfg_udp_src = 20000;
26
27#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
28
29#define UDP_PORT 5555
30#define MPLS_OVER_UDP_PORT 6635
31#define ETH_OVER_UDP_PORT 7777
32#define VXLAN_UDP_PORT 8472
33
34#define EXTPROTO_VXLAN 0x1
35
36#define VXLAN_N_VID (1u << 24)
37#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
38#define VXLAN_FLAGS 0x8
39#define VXLAN_VNI 1
40
41#ifndef NEXTHDR_DEST
42#define NEXTHDR_DEST 60
43#endif
44
45/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
46static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
47 MPLS_LS_S_MASK | 0xff);
48
49struct vxlanhdr {
50 __be32 vx_flags;
51 __be32 vx_vni;
52} __attribute__((packed));
53
54struct gre_hdr {
55 __be16 flags;
56 __be16 protocol;
57} __attribute__((packed));
58
59union l4hdr {
60 struct udphdr udp;
61 struct gre_hdr gre;
62};
63
64struct v4hdr {
65 struct iphdr ip;
66 union l4hdr l4hdr;
67 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
68} __attribute__((packed));
69
70struct v6hdr {
71 struct ipv6hdr ip;
72 union l4hdr l4hdr;
73 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
74} __attribute__((packed));
75
76static __always_inline void set_ipv4_csum(struct iphdr *iph)
77{
78 __u16 *iph16 = (__u16 *)iph;
79 __u32 csum;
80 int i;
81
82 iph->check = 0;
83
84#pragma clang loop unroll(full)
85 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
86 csum += *iph16++;
87
88 iph->check = ~((csum & 0xffff) + (csum >> 16));
89}
90
91static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
92 __u16 l2_proto, __u16 ext_proto)
93{
94 __u16 udp_dst = UDP_PORT;
95 struct iphdr iph_inner;
96 struct v4hdr h_outer;
97 struct tcphdr tcph;
98 int olen, l2_len;
99 __u8 *l2_hdr = NULL;
100 int tcp_off;
101 __u64 flags;
102
103 /* Most tests encapsulate a packet into a tunnel with the same
104 * network protocol, and derive the outer header fields from
105 * the inner header.
106 *
107 * The 6in4 case tests different inner and outer protocols. As
108 * the inner is ipv6, but the outer expects an ipv4 header as
109 * input, manually build a struct iphdr based on the ipv6hdr.
110 */
111 if (encap_proto == IPPROTO_IPV6) {
112 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
113 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
114 struct ipv6hdr iph6_inner;
115
116 /* Read the IPv6 header */
117 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
118 sizeof(iph6_inner)) < 0)
119 return TC_ACT_OK;
120
121 /* Derive the IPv4 header fields from the IPv6 header */
122 memset(&iph_inner, 0, sizeof(iph_inner));
123 iph_inner.version = 4;
124 iph_inner.ihl = 5;
125 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
126 bpf_ntohs(iph6_inner.payload_len));
127 iph_inner.ttl = iph6_inner.hop_limit - 1;
128 iph_inner.protocol = iph6_inner.nexthdr;
129 iph_inner.saddr = __bpf_constant_htonl(saddr);
130 iph_inner.daddr = __bpf_constant_htonl(daddr);
131
132 tcp_off = sizeof(iph6_inner);
133 } else {
134 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
135 sizeof(iph_inner)) < 0)
136 return TC_ACT_OK;
137
138 tcp_off = sizeof(iph_inner);
139 }
140
141 /* filter only packets we want */
142 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
143 return TC_ACT_OK;
144
145 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
146 &tcph, sizeof(tcph)) < 0)
147 return TC_ACT_OK;
148
149 if (tcph.dest != __bpf_constant_htons(cfg_port))
150 return TC_ACT_OK;
151
152 olen = sizeof(h_outer.ip);
153 l2_len = 0;
154
155 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
156
157 switch (l2_proto) {
158 case ETH_P_MPLS_UC:
159 l2_len = sizeof(mpls_label);
160 udp_dst = MPLS_OVER_UDP_PORT;
161 break;
162 case ETH_P_TEB:
163 l2_len = ETH_HLEN;
164 if (ext_proto & EXTPROTO_VXLAN) {
165 udp_dst = VXLAN_UDP_PORT;
166 l2_len += sizeof(struct vxlanhdr);
167 } else
168 udp_dst = ETH_OVER_UDP_PORT;
169 break;
170 }
171 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
172
173 switch (encap_proto) {
174 case IPPROTO_GRE:
175 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
176 olen += sizeof(h_outer.l4hdr.gre);
177 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
178 h_outer.l4hdr.gre.flags = 0;
179 break;
180 case IPPROTO_UDP:
181 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
182 olen += sizeof(h_outer.l4hdr.udp);
183 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
184 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
185 h_outer.l4hdr.udp.check = 0;
186 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
187 sizeof(h_outer.l4hdr.udp) +
188 l2_len);
189 break;
190 case IPPROTO_IPIP:
191 case IPPROTO_IPV6:
192 break;
193 default:
194 return TC_ACT_OK;
195 }
196
197 /* add L2 encap (if specified) */
198 l2_hdr = (__u8 *)&h_outer + olen;
199 switch (l2_proto) {
200 case ETH_P_MPLS_UC:
201 *(__u32 *)l2_hdr = mpls_label;
202 break;
203 case ETH_P_TEB:
204 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
205
206 if (ext_proto & EXTPROTO_VXLAN) {
207 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
208
209 vxlan_hdr->vx_flags = VXLAN_FLAGS;
210 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
211
212 l2_hdr += sizeof(struct vxlanhdr);
213 }
214
215 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
216 return TC_ACT_SHOT;
217
218 break;
219 }
220 olen += l2_len;
221
222 /* add room between mac and network header */
223 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
224 return TC_ACT_SHOT;
225
226 /* prepare new outer network header */
227 h_outer.ip = iph_inner;
228 h_outer.ip.tot_len = bpf_htons(olen +
229 bpf_ntohs(h_outer.ip.tot_len));
230 h_outer.ip.protocol = encap_proto;
231
232 set_ipv4_csum((void *)&h_outer.ip);
233
234 /* store new outer network header */
235 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
236 BPF_F_INVALIDATE_HASH) < 0)
237 return TC_ACT_SHOT;
238
239 /* if changing outer proto type, update eth->h_proto */
240 if (encap_proto == IPPROTO_IPV6) {
241 struct ethhdr eth;
242
243 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
244 return TC_ACT_SHOT;
245 eth.h_proto = bpf_htons(ETH_P_IP);
246 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
247 return TC_ACT_SHOT;
248 }
249
250 return TC_ACT_OK;
251}
252
253static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
254 __u16 l2_proto)
255{
256 return __encap_ipv4(skb, encap_proto, l2_proto, 0);
257}
258
259static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
260 __u16 l2_proto, __u16 ext_proto)
261{
262 __u16 udp_dst = UDP_PORT;
263 struct ipv6hdr iph_inner;
264 struct v6hdr h_outer;
265 struct tcphdr tcph;
266 int olen, l2_len;
267 __u8 *l2_hdr = NULL;
268 __u16 tot_len;
269 __u64 flags;
270
271 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
272 sizeof(iph_inner)) < 0)
273 return TC_ACT_OK;
274
275 /* filter only packets we want */
276 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
277 &tcph, sizeof(tcph)) < 0)
278 return TC_ACT_OK;
279
280 if (tcph.dest != __bpf_constant_htons(cfg_port))
281 return TC_ACT_OK;
282
283 olen = sizeof(h_outer.ip);
284 l2_len = 0;
285
286 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
287
288 switch (l2_proto) {
289 case ETH_P_MPLS_UC:
290 l2_len = sizeof(mpls_label);
291 udp_dst = MPLS_OVER_UDP_PORT;
292 break;
293 case ETH_P_TEB:
294 l2_len = ETH_HLEN;
295 if (ext_proto & EXTPROTO_VXLAN) {
296 udp_dst = VXLAN_UDP_PORT;
297 l2_len += sizeof(struct vxlanhdr);
298 } else
299 udp_dst = ETH_OVER_UDP_PORT;
300 break;
301 }
302 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
303
304 switch (encap_proto) {
305 case IPPROTO_GRE:
306 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
307 olen += sizeof(h_outer.l4hdr.gre);
308 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
309 h_outer.l4hdr.gre.flags = 0;
310 break;
311 case IPPROTO_UDP:
312 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
313 olen += sizeof(h_outer.l4hdr.udp);
314 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
315 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
316 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
317 sizeof(h_outer.l4hdr.udp) + l2_len;
318 h_outer.l4hdr.udp.check = 0;
319 h_outer.l4hdr.udp.len = bpf_htons(tot_len);
320 break;
321 case IPPROTO_IPV6:
322 break;
323 default:
324 return TC_ACT_OK;
325 }
326
327 /* add L2 encap (if specified) */
328 l2_hdr = (__u8 *)&h_outer + olen;
329 switch (l2_proto) {
330 case ETH_P_MPLS_UC:
331 *(__u32 *)l2_hdr = mpls_label;
332 break;
333 case ETH_P_TEB:
334 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
335
336 if (ext_proto & EXTPROTO_VXLAN) {
337 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
338
339 vxlan_hdr->vx_flags = VXLAN_FLAGS;
340 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
341
342 l2_hdr += sizeof(struct vxlanhdr);
343 }
344
345 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
346 return TC_ACT_SHOT;
347 break;
348 }
349 olen += l2_len;
350
351 /* add room between mac and network header */
352 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
353 return TC_ACT_SHOT;
354
355 /* prepare new outer network header */
356 h_outer.ip = iph_inner;
357 h_outer.ip.payload_len = bpf_htons(olen +
358 bpf_ntohs(h_outer.ip.payload_len));
359
360 h_outer.ip.nexthdr = encap_proto;
361
362 /* store new outer network header */
363 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
364 BPF_F_INVALIDATE_HASH) < 0)
365 return TC_ACT_SHOT;
366
367 return TC_ACT_OK;
368}
369
370static int encap_ipv6_ipip6(struct __sk_buff *skb)
371{
372 struct iphdr iph_inner;
373 struct v6hdr h_outer;
374 struct tcphdr tcph;
375 struct ethhdr eth;
376 __u64 flags;
377 int olen;
378
379 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
380 sizeof(iph_inner)) < 0)
381 return TC_ACT_OK;
382
383 /* filter only packets we want */
384 if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
385 &tcph, sizeof(tcph)) < 0)
386 return TC_ACT_OK;
387
388 if (tcph.dest != __bpf_constant_htons(cfg_port))
389 return TC_ACT_OK;
390
391 olen = sizeof(h_outer.ip);
392
393 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
394
395 /* add room between mac and network header */
396 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
397 return TC_ACT_SHOT;
398
399 /* prepare new outer network header */
400 memset(&h_outer.ip, 0, sizeof(h_outer.ip));
401 h_outer.ip.version = 6;
402 h_outer.ip.hop_limit = iph_inner.ttl;
403 h_outer.ip.saddr.s6_addr[1] = 0xfd;
404 h_outer.ip.saddr.s6_addr[15] = 1;
405 h_outer.ip.daddr.s6_addr[1] = 0xfd;
406 h_outer.ip.daddr.s6_addr[15] = 2;
407 h_outer.ip.payload_len = iph_inner.tot_len;
408 h_outer.ip.nexthdr = IPPROTO_IPIP;
409
410 /* store new outer network header */
411 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
412 BPF_F_INVALIDATE_HASH) < 0)
413 return TC_ACT_SHOT;
414
415 /* update eth->h_proto */
416 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
417 return TC_ACT_SHOT;
418 eth.h_proto = bpf_htons(ETH_P_IPV6);
419 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
420 return TC_ACT_SHOT;
421
422 return TC_ACT_OK;
423}
424
425static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
426 __u16 l2_proto)
427{
428 return __encap_ipv6(skb, encap_proto, l2_proto, 0);
429}
430
431SEC("encap_ipip_none")
432int __encap_ipip_none(struct __sk_buff *skb)
433{
434 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
435 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
436 else
437 return TC_ACT_OK;
438}
439
440SEC("encap_gre_none")
441int __encap_gre_none(struct __sk_buff *skb)
442{
443 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
444 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
445 else
446 return TC_ACT_OK;
447}
448
449SEC("encap_gre_mpls")
450int __encap_gre_mpls(struct __sk_buff *skb)
451{
452 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
453 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
454 else
455 return TC_ACT_OK;
456}
457
458SEC("encap_gre_eth")
459int __encap_gre_eth(struct __sk_buff *skb)
460{
461 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
462 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
463 else
464 return TC_ACT_OK;
465}
466
467SEC("encap_udp_none")
468int __encap_udp_none(struct __sk_buff *skb)
469{
470 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
471 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
472 else
473 return TC_ACT_OK;
474}
475
476SEC("encap_udp_mpls")
477int __encap_udp_mpls(struct __sk_buff *skb)
478{
479 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
480 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
481 else
482 return TC_ACT_OK;
483}
484
485SEC("encap_udp_eth")
486int __encap_udp_eth(struct __sk_buff *skb)
487{
488 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
489 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
490 else
491 return TC_ACT_OK;
492}
493
494SEC("encap_vxlan_eth")
495int __encap_vxlan_eth(struct __sk_buff *skb)
496{
497 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
498 return __encap_ipv4(skb, IPPROTO_UDP,
499 ETH_P_TEB,
500 EXTPROTO_VXLAN);
501 else
502 return TC_ACT_OK;
503}
504
505SEC("encap_sit_none")
506int __encap_sit_none(struct __sk_buff *skb)
507{
508 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
509 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
510 else
511 return TC_ACT_OK;
512}
513
514SEC("encap_ip6tnl_none")
515int __encap_ip6tnl_none(struct __sk_buff *skb)
516{
517 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
518 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
519 else
520 return TC_ACT_OK;
521}
522
523SEC("encap_ipip6_none")
524int __encap_ipip6_none(struct __sk_buff *skb)
525{
526 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
527 return encap_ipv6_ipip6(skb);
528 else
529 return TC_ACT_OK;
530}
531
532SEC("encap_ip6gre_none")
533int __encap_ip6gre_none(struct __sk_buff *skb)
534{
535 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
536 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
537 else
538 return TC_ACT_OK;
539}
540
541SEC("encap_ip6gre_mpls")
542int __encap_ip6gre_mpls(struct __sk_buff *skb)
543{
544 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
545 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
546 else
547 return TC_ACT_OK;
548}
549
550SEC("encap_ip6gre_eth")
551int __encap_ip6gre_eth(struct __sk_buff *skb)
552{
553 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
554 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
555 else
556 return TC_ACT_OK;
557}
558
559SEC("encap_ip6udp_none")
560int __encap_ip6udp_none(struct __sk_buff *skb)
561{
562 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
563 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
564 else
565 return TC_ACT_OK;
566}
567
568SEC("encap_ip6udp_mpls")
569int __encap_ip6udp_mpls(struct __sk_buff *skb)
570{
571 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
572 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
573 else
574 return TC_ACT_OK;
575}
576
577SEC("encap_ip6udp_eth")
578int __encap_ip6udp_eth(struct __sk_buff *skb)
579{
580 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
581 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
582 else
583 return TC_ACT_OK;
584}
585
586SEC("encap_ip6vxlan_eth")
587int __encap_ip6vxlan_eth(struct __sk_buff *skb)
588{
589 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
590 return __encap_ipv6(skb, IPPROTO_UDP,
591 ETH_P_TEB,
592 EXTPROTO_VXLAN);
593 else
594 return TC_ACT_OK;
595}
596
597static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
598{
599 __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
600 struct ipv6_opt_hdr ip6_opt_hdr;
601 struct gre_hdr greh;
602 struct udphdr udph;
603 int olen = len;
604
605 switch (proto) {
606 case IPPROTO_IPIP:
607 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
608 break;
609 case IPPROTO_IPV6:
610 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
611 break;
612 case NEXTHDR_DEST:
613 if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
614 sizeof(ip6_opt_hdr)) < 0)
615 return TC_ACT_OK;
616 switch (ip6_opt_hdr.nexthdr) {
617 case IPPROTO_IPIP:
618 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
619 break;
620 case IPPROTO_IPV6:
621 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
622 break;
623 default:
624 return TC_ACT_OK;
625 }
626 break;
627 case IPPROTO_GRE:
628 olen += sizeof(struct gre_hdr);
629 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
630 return TC_ACT_OK;
631 switch (bpf_ntohs(greh.protocol)) {
632 case ETH_P_MPLS_UC:
633 olen += sizeof(mpls_label);
634 break;
635 case ETH_P_TEB:
636 olen += ETH_HLEN;
637 break;
638 }
639 break;
640 case IPPROTO_UDP:
641 olen += sizeof(struct udphdr);
642 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
643 return TC_ACT_OK;
644 switch (bpf_ntohs(udph.dest)) {
645 case MPLS_OVER_UDP_PORT:
646 olen += sizeof(mpls_label);
647 break;
648 case ETH_OVER_UDP_PORT:
649 olen += ETH_HLEN;
650 break;
651 case VXLAN_UDP_PORT:
652 olen += ETH_HLEN + sizeof(struct vxlanhdr);
653 break;
654 }
655 break;
656 default:
657 return TC_ACT_OK;
658 }
659
660 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
661 return TC_ACT_SHOT;
662
663 return TC_ACT_OK;
664}
665
666static int decap_ipv4(struct __sk_buff *skb)
667{
668 struct iphdr iph_outer;
669
670 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
671 sizeof(iph_outer)) < 0)
672 return TC_ACT_OK;
673
674 if (iph_outer.ihl != 5)
675 return TC_ACT_OK;
676
677 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
678 iph_outer.protocol);
679}
680
681static int decap_ipv6(struct __sk_buff *skb)
682{
683 struct ipv6hdr iph_outer;
684
685 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
686 sizeof(iph_outer)) < 0)
687 return TC_ACT_OK;
688
689 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
690 iph_outer.nexthdr);
691}
692
693SEC("decap")
694int decap_f(struct __sk_buff *skb)
695{
696 switch (skb->protocol) {
697 case __bpf_constant_htons(ETH_P_IP):
698 return decap_ipv4(skb);
699 case __bpf_constant_htons(ETH_P_IPV6):
700 return decap_ipv6(skb);
701 default:
702 /* does not match, ignore */
703 return TC_ACT_OK;
704 }
705}
706
707char __license[] SEC("license") = "GPL";