Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2022 Meta
3
4#include <stddef.h>
5#include <stdint.h>
6#include <stdbool.h>
7#include <linux/bpf.h>
8#include <linux/stddef.h>
9#include <linux/pkt_cls.h>
10#include <linux/if_ether.h>
11#include <linux/in.h>
12#include <linux/ip.h>
13#include <linux/ipv6.h>
14#include <linux/tcp.h>
15#include <linux/udp.h>
16#include <bpf/bpf_helpers.h>
17#include <bpf/bpf_endian.h>
18
19/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
20 * | |
21 * ns_src | ns_fwd | ns_dst
22 *
23 * ns_src and ns_dst: ENDHOST namespace
24 * ns_fwd: Fowarding namespace
25 */
26
27#define ctx_ptr(field) (void *)(long)(field)
28
29#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
30#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
31
32#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
33 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
34#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
35 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
36
37#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
38 a.s6_addr32[1] == b.s6_addr32[1] && \
39 a.s6_addr32[2] == b.s6_addr32[2] && \
40 a.s6_addr32[3] == b.s6_addr32[3])
41
42volatile const __u32 IFINDEX_SRC;
43volatile const __u32 IFINDEX_DST;
44
45#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
46#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
47#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
48
49enum {
50 INGRESS_FWDNS_P100,
51 INGRESS_FWDNS_P101,
52 EGRESS_FWDNS_P100,
53 EGRESS_FWDNS_P101,
54 INGRESS_ENDHOST,
55 EGRESS_ENDHOST,
56 SET_DTIME,
57 __MAX_CNT,
58};
59
60enum {
61 TCP_IP6_CLEAR_DTIME,
62 TCP_IP4,
63 TCP_IP6,
64 UDP_IP4,
65 UDP_IP6,
66 TCP_IP4_RT_FWD,
67 TCP_IP6_RT_FWD,
68 UDP_IP4_RT_FWD,
69 UDP_IP6_RT_FWD,
70 UKN_TEST,
71 __NR_TESTS,
72};
73
74enum {
75 SRC_NS = 1,
76 DST_NS,
77};
78
79__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
80__u32 errs[__NR_TESTS][__MAX_CNT] = {};
81__u32 test = 0;
82
83static void inc_dtimes(__u32 idx)
84{
85 if (test < __NR_TESTS)
86 dtimes[test][idx]++;
87 else
88 dtimes[UKN_TEST][idx]++;
89}
90
91static void inc_errs(__u32 idx)
92{
93 if (test < __NR_TESTS)
94 errs[test][idx]++;
95 else
96 errs[UKN_TEST][idx]++;
97}
98
99static int skb_proto(int type)
100{
101 return type & 0xff;
102}
103
104static int skb_ns(int type)
105{
106 return (type >> 8) & 0xff;
107}
108
109static bool fwdns_clear_dtime(void)
110{
111 return test == TCP_IP6_CLEAR_DTIME;
112}
113
114static bool bpf_fwd(void)
115{
116 return test < TCP_IP4_RT_FWD;
117}
118
119static __u8 get_proto(void)
120{
121 switch (test) {
122 case UDP_IP4:
123 case UDP_IP6:
124 case UDP_IP4_RT_FWD:
125 case UDP_IP6_RT_FWD:
126 return IPPROTO_UDP;
127 default:
128 return IPPROTO_TCP;
129 }
130}
131
132/* -1: parse error: TC_ACT_SHOT
133 * 0: not testing traffic: TC_ACT_OK
134 * >0: first byte is the inet_proto, second byte has the netns
135 * of the sender
136 */
137static int skb_get_type(struct __sk_buff *skb)
138{
139 __u16 dst_ns_port = __bpf_htons(50000 + test);
140 void *data_end = ctx_ptr(skb->data_end);
141 void *data = ctx_ptr(skb->data);
142 __u8 inet_proto = 0, ns = 0;
143 struct ipv6hdr *ip6h;
144 __u16 sport, dport;
145 struct iphdr *iph;
146 struct tcphdr *th;
147 struct udphdr *uh;
148 void *trans;
149
150 switch (skb->protocol) {
151 case __bpf_htons(ETH_P_IP):
152 iph = data + sizeof(struct ethhdr);
153 if (iph + 1 > data_end)
154 return -1;
155 if (iph->saddr == ip4_src)
156 ns = SRC_NS;
157 else if (iph->saddr == ip4_dst)
158 ns = DST_NS;
159 inet_proto = iph->protocol;
160 trans = iph + 1;
161 break;
162 case __bpf_htons(ETH_P_IPV6):
163 ip6h = data + sizeof(struct ethhdr);
164 if (ip6h + 1 > data_end)
165 return -1;
166 if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}}))
167 ns = SRC_NS;
168 else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}}))
169 ns = DST_NS;
170 inet_proto = ip6h->nexthdr;
171 trans = ip6h + 1;
172 break;
173 default:
174 return 0;
175 }
176
177 /* skb is not from src_ns or dst_ns.
178 * skb is not the testing IPPROTO.
179 */
180 if (!ns || inet_proto != get_proto())
181 return 0;
182
183 switch (inet_proto) {
184 case IPPROTO_TCP:
185 th = trans;
186 if (th + 1 > data_end)
187 return -1;
188 sport = th->source;
189 dport = th->dest;
190 break;
191 case IPPROTO_UDP:
192 uh = trans;
193 if (uh + 1 > data_end)
194 return -1;
195 sport = uh->source;
196 dport = uh->dest;
197 break;
198 default:
199 return 0;
200 }
201
202 /* The skb is the testing traffic */
203 if ((ns == SRC_NS && dport == dst_ns_port) ||
204 (ns == DST_NS && sport == dst_ns_port))
205 return (ns << 8 | inet_proto);
206
207 return 0;
208}
209
210/* format: direction@iface@netns
211 * egress@veth_(src|dst)@ns_(src|dst)
212 */
213SEC("tc")
214int egress_host(struct __sk_buff *skb)
215{
216 int skb_type;
217
218 skb_type = skb_get_type(skb);
219 if (skb_type == -1)
220 return TC_ACT_SHOT;
221 if (!skb_type)
222 return TC_ACT_OK;
223
224 if (skb_proto(skb_type) == IPPROTO_TCP) {
225 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
226 skb->tstamp)
227 inc_dtimes(EGRESS_ENDHOST);
228 else
229 inc_errs(EGRESS_ENDHOST);
230 } else if (skb_proto(skb_type) == IPPROTO_UDP) {
231 if (skb->tstamp_type == BPF_SKB_CLOCK_TAI &&
232 skb->tstamp)
233 inc_dtimes(EGRESS_ENDHOST);
234 else
235 inc_errs(EGRESS_ENDHOST);
236 } else {
237 if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME &&
238 skb->tstamp)
239 inc_errs(EGRESS_ENDHOST);
240 }
241
242 skb->tstamp = EGRESS_ENDHOST_MAGIC;
243
244 return TC_ACT_OK;
245}
246
247/* ingress@veth_(src|dst)@ns_(src|dst) */
248SEC("tc")
249int ingress_host(struct __sk_buff *skb)
250{
251 int skb_type;
252
253 skb_type = skb_get_type(skb);
254 if (skb_type == -1)
255 return TC_ACT_SHOT;
256 if (!skb_type)
257 return TC_ACT_OK;
258
259 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
260 skb->tstamp == EGRESS_FWDNS_MAGIC)
261 inc_dtimes(INGRESS_ENDHOST);
262 else
263 inc_errs(INGRESS_ENDHOST);
264
265 return TC_ACT_OK;
266}
267
268/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
269SEC("tc")
270int ingress_fwdns_prio100(struct __sk_buff *skb)
271{
272 int skb_type;
273
274 skb_type = skb_get_type(skb);
275 if (skb_type == -1)
276 return TC_ACT_SHOT;
277 if (!skb_type)
278 return TC_ACT_OK;
279
280 /* delivery_time is only available to the ingress
281 * if the tc-bpf checks the skb->tstamp_type.
282 */
283 if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
284 inc_errs(INGRESS_FWDNS_P100);
285
286 if (fwdns_clear_dtime())
287 skb->tstamp = 0;
288
289 return TC_ACT_UNSPEC;
290}
291
292/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
293SEC("tc")
294int egress_fwdns_prio100(struct __sk_buff *skb)
295{
296 int skb_type;
297
298 skb_type = skb_get_type(skb);
299 if (skb_type == -1)
300 return TC_ACT_SHOT;
301 if (!skb_type)
302 return TC_ACT_OK;
303
304 /* delivery_time is always available to egress even
305 * the tc-bpf did not use the tstamp_type.
306 */
307 if (skb->tstamp == INGRESS_FWDNS_MAGIC)
308 inc_dtimes(EGRESS_FWDNS_P100);
309 else
310 inc_errs(EGRESS_FWDNS_P100);
311
312 if (fwdns_clear_dtime())
313 skb->tstamp = 0;
314
315 return TC_ACT_UNSPEC;
316}
317
318/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
319SEC("tc")
320int ingress_fwdns_prio101(struct __sk_buff *skb)
321{
322 int skb_type;
323
324 skb_type = skb_get_type(skb);
325 if (skb_type == -1 || !skb_type)
326 /* Should have handled in prio100 */
327 return TC_ACT_SHOT;
328
329 if (skb->tstamp_type) {
330 if (fwdns_clear_dtime() ||
331 (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC &&
332 skb->tstamp_type != BPF_SKB_CLOCK_TAI) ||
333 skb->tstamp != EGRESS_ENDHOST_MAGIC)
334 inc_errs(INGRESS_FWDNS_P101);
335 else
336 inc_dtimes(INGRESS_FWDNS_P101);
337 } else {
338 if (!fwdns_clear_dtime())
339 inc_errs(INGRESS_FWDNS_P101);
340 }
341
342 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
343 skb->tstamp = INGRESS_FWDNS_MAGIC;
344 } else {
345 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
346 BPF_SKB_CLOCK_MONOTONIC))
347 inc_errs(SET_DTIME);
348 }
349
350 if (skb_ns(skb_type) == SRC_NS)
351 return bpf_fwd() ?
352 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
353 else
354 return bpf_fwd() ?
355 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
356}
357
358/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
359SEC("tc")
360int egress_fwdns_prio101(struct __sk_buff *skb)
361{
362 int skb_type;
363
364 skb_type = skb_get_type(skb);
365 if (skb_type == -1 || !skb_type)
366 /* Should have handled in prio100 */
367 return TC_ACT_SHOT;
368
369 if (skb->tstamp_type) {
370 if (fwdns_clear_dtime() ||
371 skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC ||
372 skb->tstamp != INGRESS_FWDNS_MAGIC)
373 inc_errs(EGRESS_FWDNS_P101);
374 else
375 inc_dtimes(EGRESS_FWDNS_P101);
376 } else {
377 if (!fwdns_clear_dtime())
378 inc_errs(EGRESS_FWDNS_P101);
379 }
380
381 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
382 skb->tstamp = EGRESS_FWDNS_MAGIC;
383 } else {
384 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
385 BPF_SKB_CLOCK_MONOTONIC))
386 inc_errs(SET_DTIME);
387 }
388
389 return TC_ACT_OK;
390}
391
392char __license[] SEC("license") = "GPL";