Loading...
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2019 Facebook */
3
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <stdbool.h>
7
8#include <bpf/bpf_helpers.h>
9#include <bpf/bpf_endian.h>
10
11enum bpf_linum_array_idx {
12 EGRESS_LINUM_IDX,
13 INGRESS_LINUM_IDX,
14 READ_SK_DST_PORT_LINUM_IDX,
15 __NR_BPF_LINUM_ARRAY_IDX,
16};
17
18struct {
19 __uint(type, BPF_MAP_TYPE_ARRAY);
20 __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
21 __type(key, __u32);
22 __type(value, __u32);
23} linum_map SEC(".maps");
24
25struct bpf_spinlock_cnt {
26 struct bpf_spin_lock lock;
27 __u32 cnt;
28};
29
30struct {
31 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
32 __uint(map_flags, BPF_F_NO_PREALLOC);
33 __type(key, int);
34 __type(value, struct bpf_spinlock_cnt);
35} sk_pkt_out_cnt SEC(".maps");
36
37struct {
38 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
39 __uint(map_flags, BPF_F_NO_PREALLOC);
40 __type(key, int);
41 __type(value, struct bpf_spinlock_cnt);
42} sk_pkt_out_cnt10 SEC(".maps");
43
44struct tcp_sock {
45 __u32 lsndtime;
46} __attribute__((preserve_access_index));
47
48struct bpf_tcp_sock listen_tp = {};
49struct sockaddr_in6 srv_sa6 = {};
50struct bpf_tcp_sock cli_tp = {};
51struct bpf_tcp_sock srv_tp = {};
52struct bpf_sock listen_sk = {};
53struct bpf_sock srv_sk = {};
54struct bpf_sock cli_sk = {};
55__u64 parent_cg_id = 0;
56__u64 child_cg_id = 0;
57__u64 lsndtime = 0;
58
59static bool is_loopback6(__u32 *a6)
60{
61 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
62}
63
64static void skcpy(struct bpf_sock *dst,
65 const struct bpf_sock *src)
66{
67 dst->bound_dev_if = src->bound_dev_if;
68 dst->family = src->family;
69 dst->type = src->type;
70 dst->protocol = src->protocol;
71 dst->mark = src->mark;
72 dst->priority = src->priority;
73 dst->src_ip4 = src->src_ip4;
74 dst->src_ip6[0] = src->src_ip6[0];
75 dst->src_ip6[1] = src->src_ip6[1];
76 dst->src_ip6[2] = src->src_ip6[2];
77 dst->src_ip6[3] = src->src_ip6[3];
78 dst->src_port = src->src_port;
79 dst->dst_ip4 = src->dst_ip4;
80 dst->dst_ip6[0] = src->dst_ip6[0];
81 dst->dst_ip6[1] = src->dst_ip6[1];
82 dst->dst_ip6[2] = src->dst_ip6[2];
83 dst->dst_ip6[3] = src->dst_ip6[3];
84 dst->dst_port = src->dst_port;
85 dst->state = src->state;
86}
87
88static void tpcpy(struct bpf_tcp_sock *dst,
89 const struct bpf_tcp_sock *src)
90{
91 dst->snd_cwnd = src->snd_cwnd;
92 dst->srtt_us = src->srtt_us;
93 dst->rtt_min = src->rtt_min;
94 dst->snd_ssthresh = src->snd_ssthresh;
95 dst->rcv_nxt = src->rcv_nxt;
96 dst->snd_nxt = src->snd_nxt;
97 dst->snd_una = src->snd_una;
98 dst->mss_cache = src->mss_cache;
99 dst->ecn_flags = src->ecn_flags;
100 dst->rate_delivered = src->rate_delivered;
101 dst->rate_interval_us = src->rate_interval_us;
102 dst->packets_out = src->packets_out;
103 dst->retrans_out = src->retrans_out;
104 dst->total_retrans = src->total_retrans;
105 dst->segs_in = src->segs_in;
106 dst->data_segs_in = src->data_segs_in;
107 dst->segs_out = src->segs_out;
108 dst->data_segs_out = src->data_segs_out;
109 dst->lost_out = src->lost_out;
110 dst->sacked_out = src->sacked_out;
111 dst->bytes_received = src->bytes_received;
112 dst->bytes_acked = src->bytes_acked;
113}
114
115/* Always return CG_OK so that no pkt will be filtered out */
116#define CG_OK 1
117
118#define RET_LOG() ({ \
119 linum = __LINE__; \
120 bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
121 return CG_OK; \
122})
123
124SEC("cgroup_skb/egress")
125int egress_read_sock_fields(struct __sk_buff *skb)
126{
127 struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
128 struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
129 struct bpf_tcp_sock *tp, *tp_ret;
130 struct bpf_sock *sk, *sk_ret;
131 __u32 linum, linum_idx;
132 struct tcp_sock *ktp;
133
134 linum_idx = EGRESS_LINUM_IDX;
135
136 sk = skb->sk;
137 if (!sk)
138 RET_LOG();
139
140 /* Not testing the egress traffic or the listening socket,
141 * which are covered by the cgroup_skb/ingress test program.
142 */
143 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
144 sk->state == BPF_TCP_LISTEN)
145 return CG_OK;
146
147 if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
148 /* Server socket */
149 sk_ret = &srv_sk;
150 tp_ret = &srv_tp;
151 } else if (sk->dst_port == srv_sa6.sin6_port) {
152 /* Client socket */
153 sk_ret = &cli_sk;
154 tp_ret = &cli_tp;
155 } else {
156 /* Not the testing egress traffic */
157 return CG_OK;
158 }
159
160 /* It must be a fullsock for cgroup_skb/egress prog */
161 sk = bpf_sk_fullsock(sk);
162 if (!sk)
163 RET_LOG();
164
165 /* Not the testing egress traffic */
166 if (sk->protocol != IPPROTO_TCP)
167 return CG_OK;
168
169 tp = bpf_tcp_sock(sk);
170 if (!tp)
171 RET_LOG();
172
173 skcpy(sk_ret, sk);
174 tpcpy(tp_ret, tp);
175
176 if (sk_ret == &srv_sk) {
177 ktp = bpf_skc_to_tcp_sock(sk);
178
179 if (!ktp)
180 RET_LOG();
181
182 lsndtime = ktp->lsndtime;
183
184 child_cg_id = bpf_sk_cgroup_id(ktp);
185 if (!child_cg_id)
186 RET_LOG();
187
188 parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
189 if (!parent_cg_id)
190 RET_LOG();
191
192 /* The userspace has created it for srv sk */
193 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
194 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
195 0, 0);
196 } else {
197 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
198 &cli_cnt_init,
199 BPF_SK_STORAGE_GET_F_CREATE);
200 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
201 sk, &cli_cnt_init,
202 BPF_SK_STORAGE_GET_F_CREATE);
203 }
204
205 if (!pkt_out_cnt || !pkt_out_cnt10)
206 RET_LOG();
207
208 /* Even both cnt and cnt10 have lock defined in their BTF,
209 * intentionally one cnt takes lock while one does not
210 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
211 */
212 pkt_out_cnt->cnt += 1;
213 bpf_spin_lock(&pkt_out_cnt10->lock);
214 pkt_out_cnt10->cnt += 10;
215 bpf_spin_unlock(&pkt_out_cnt10->lock);
216
217 return CG_OK;
218}
219
220SEC("cgroup_skb/ingress")
221int ingress_read_sock_fields(struct __sk_buff *skb)
222{
223 struct bpf_tcp_sock *tp;
224 __u32 linum, linum_idx;
225 struct bpf_sock *sk;
226
227 linum_idx = INGRESS_LINUM_IDX;
228
229 sk = skb->sk;
230 if (!sk)
231 RET_LOG();
232
233 /* Not the testing ingress traffic to the server */
234 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
235 sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
236 return CG_OK;
237
238 /* Only interested in the listening socket */
239 if (sk->state != BPF_TCP_LISTEN)
240 return CG_OK;
241
242 /* It must be a fullsock for cgroup_skb/ingress prog */
243 sk = bpf_sk_fullsock(sk);
244 if (!sk)
245 RET_LOG();
246
247 tp = bpf_tcp_sock(sk);
248 if (!tp)
249 RET_LOG();
250
251 skcpy(&listen_sk, sk);
252 tpcpy(&listen_tp, tp);
253
254 return CG_OK;
255}
256
257/*
258 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
259 * gets rewritten by the access converter to a 2-byte load for
260 * backward compatibility. Treating the load result as a be16 value
261 * makes the code portable across little- and big-endian platforms.
262 */
263static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
264{
265 __u32 *word = (__u32 *)&sk->dst_port;
266 return word[0] == bpf_htons(0xcafe);
267}
268
269static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
270{
271 __u16 *half;
272
273 asm volatile ("");
274 half = (__u16 *)&sk->dst_port;
275 return half[0] == bpf_htons(0xcafe);
276}
277
278static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
279{
280 __u8 *byte = (__u8 *)&sk->dst_port;
281 return byte[0] == 0xca && byte[1] == 0xfe;
282}
283
284SEC("cgroup_skb/egress")
285int read_sk_dst_port(struct __sk_buff *skb)
286{
287 __u32 linum, linum_idx;
288 struct bpf_sock *sk;
289
290 linum_idx = READ_SK_DST_PORT_LINUM_IDX;
291
292 sk = skb->sk;
293 if (!sk)
294 RET_LOG();
295
296 /* Ignore everything but the SYN from the client socket */
297 if (sk->state != BPF_TCP_SYN_SENT)
298 return CG_OK;
299
300 if (!sk_dst_port__load_word(sk))
301 RET_LOG();
302 if (!sk_dst_port__load_half(sk))
303 RET_LOG();
304 if (!sk_dst_port__load_byte(sk))
305 RET_LOG();
306
307 return CG_OK;
308}
309
310char _license[] SEC("license") = "GPL";
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2019 Facebook */
3
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <stdbool.h>
7
8#include <bpf/bpf_helpers.h>
9#include <bpf/bpf_endian.h>
10#include "bpf_tcp_helpers.h"
11
12enum bpf_linum_array_idx {
13 EGRESS_LINUM_IDX,
14 INGRESS_LINUM_IDX,
15 __NR_BPF_LINUM_ARRAY_IDX,
16};
17
18struct {
19 __uint(type, BPF_MAP_TYPE_ARRAY);
20 __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
21 __type(key, __u32);
22 __type(value, __u32);
23} linum_map SEC(".maps");
24
25struct bpf_spinlock_cnt {
26 struct bpf_spin_lock lock;
27 __u32 cnt;
28};
29
30struct {
31 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
32 __uint(map_flags, BPF_F_NO_PREALLOC);
33 __type(key, int);
34 __type(value, struct bpf_spinlock_cnt);
35} sk_pkt_out_cnt SEC(".maps");
36
37struct {
38 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
39 __uint(map_flags, BPF_F_NO_PREALLOC);
40 __type(key, int);
41 __type(value, struct bpf_spinlock_cnt);
42} sk_pkt_out_cnt10 SEC(".maps");
43
44struct bpf_tcp_sock listen_tp = {};
45struct sockaddr_in6 srv_sa6 = {};
46struct bpf_tcp_sock cli_tp = {};
47struct bpf_tcp_sock srv_tp = {};
48struct bpf_sock listen_sk = {};
49struct bpf_sock srv_sk = {};
50struct bpf_sock cli_sk = {};
51__u64 parent_cg_id = 0;
52__u64 child_cg_id = 0;
53__u64 lsndtime = 0;
54
55static bool is_loopback6(__u32 *a6)
56{
57 return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
58}
59
60static void skcpy(struct bpf_sock *dst,
61 const struct bpf_sock *src)
62{
63 dst->bound_dev_if = src->bound_dev_if;
64 dst->family = src->family;
65 dst->type = src->type;
66 dst->protocol = src->protocol;
67 dst->mark = src->mark;
68 dst->priority = src->priority;
69 dst->src_ip4 = src->src_ip4;
70 dst->src_ip6[0] = src->src_ip6[0];
71 dst->src_ip6[1] = src->src_ip6[1];
72 dst->src_ip6[2] = src->src_ip6[2];
73 dst->src_ip6[3] = src->src_ip6[3];
74 dst->src_port = src->src_port;
75 dst->dst_ip4 = src->dst_ip4;
76 dst->dst_ip6[0] = src->dst_ip6[0];
77 dst->dst_ip6[1] = src->dst_ip6[1];
78 dst->dst_ip6[2] = src->dst_ip6[2];
79 dst->dst_ip6[3] = src->dst_ip6[3];
80 dst->dst_port = src->dst_port;
81 dst->state = src->state;
82}
83
84static void tpcpy(struct bpf_tcp_sock *dst,
85 const struct bpf_tcp_sock *src)
86{
87 dst->snd_cwnd = src->snd_cwnd;
88 dst->srtt_us = src->srtt_us;
89 dst->rtt_min = src->rtt_min;
90 dst->snd_ssthresh = src->snd_ssthresh;
91 dst->rcv_nxt = src->rcv_nxt;
92 dst->snd_nxt = src->snd_nxt;
93 dst->snd_una = src->snd_una;
94 dst->mss_cache = src->mss_cache;
95 dst->ecn_flags = src->ecn_flags;
96 dst->rate_delivered = src->rate_delivered;
97 dst->rate_interval_us = src->rate_interval_us;
98 dst->packets_out = src->packets_out;
99 dst->retrans_out = src->retrans_out;
100 dst->total_retrans = src->total_retrans;
101 dst->segs_in = src->segs_in;
102 dst->data_segs_in = src->data_segs_in;
103 dst->segs_out = src->segs_out;
104 dst->data_segs_out = src->data_segs_out;
105 dst->lost_out = src->lost_out;
106 dst->sacked_out = src->sacked_out;
107 dst->bytes_received = src->bytes_received;
108 dst->bytes_acked = src->bytes_acked;
109}
110
111/* Always return CG_OK so that no pkt will be filtered out */
112#define CG_OK 1
113
114#define RET_LOG() ({ \
115 linum = __LINE__; \
116 bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
117 return CG_OK; \
118})
119
120SEC("cgroup_skb/egress")
121int egress_read_sock_fields(struct __sk_buff *skb)
122{
123 struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
124 struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
125 struct bpf_tcp_sock *tp, *tp_ret;
126 struct bpf_sock *sk, *sk_ret;
127 __u32 linum, linum_idx;
128 struct tcp_sock *ktp;
129
130 linum_idx = EGRESS_LINUM_IDX;
131
132 sk = skb->sk;
133 if (!sk)
134 RET_LOG();
135
136 /* Not the testing egress traffic or
137 * TCP_LISTEN (10) socket will be copied at the ingress side.
138 */
139 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
140 sk->state == 10)
141 return CG_OK;
142
143 if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
144 /* Server socket */
145 sk_ret = &srv_sk;
146 tp_ret = &srv_tp;
147 } else if (sk->dst_port == srv_sa6.sin6_port) {
148 /* Client socket */
149 sk_ret = &cli_sk;
150 tp_ret = &cli_tp;
151 } else {
152 /* Not the testing egress traffic */
153 return CG_OK;
154 }
155
156 /* It must be a fullsock for cgroup_skb/egress prog */
157 sk = bpf_sk_fullsock(sk);
158 if (!sk)
159 RET_LOG();
160
161 /* Not the testing egress traffic */
162 if (sk->protocol != IPPROTO_TCP)
163 return CG_OK;
164
165 tp = bpf_tcp_sock(sk);
166 if (!tp)
167 RET_LOG();
168
169 skcpy(sk_ret, sk);
170 tpcpy(tp_ret, tp);
171
172 if (sk_ret == &srv_sk) {
173 ktp = bpf_skc_to_tcp_sock(sk);
174
175 if (!ktp)
176 RET_LOG();
177
178 lsndtime = ktp->lsndtime;
179
180 child_cg_id = bpf_sk_cgroup_id(ktp);
181 if (!child_cg_id)
182 RET_LOG();
183
184 parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
185 if (!parent_cg_id)
186 RET_LOG();
187
188 /* The userspace has created it for srv sk */
189 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
190 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
191 0, 0);
192 } else {
193 pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
194 &cli_cnt_init,
195 BPF_SK_STORAGE_GET_F_CREATE);
196 pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
197 sk, &cli_cnt_init,
198 BPF_SK_STORAGE_GET_F_CREATE);
199 }
200
201 if (!pkt_out_cnt || !pkt_out_cnt10)
202 RET_LOG();
203
204 /* Even both cnt and cnt10 have lock defined in their BTF,
205 * intentionally one cnt takes lock while one does not
206 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
207 */
208 pkt_out_cnt->cnt += 1;
209 bpf_spin_lock(&pkt_out_cnt10->lock);
210 pkt_out_cnt10->cnt += 10;
211 bpf_spin_unlock(&pkt_out_cnt10->lock);
212
213 return CG_OK;
214}
215
216SEC("cgroup_skb/ingress")
217int ingress_read_sock_fields(struct __sk_buff *skb)
218{
219 struct bpf_tcp_sock *tp;
220 __u32 linum, linum_idx;
221 struct bpf_sock *sk;
222
223 linum_idx = INGRESS_LINUM_IDX;
224
225 sk = skb->sk;
226 if (!sk)
227 RET_LOG();
228
229 /* Not the testing ingress traffic to the server */
230 if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
231 sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
232 return CG_OK;
233
234 /* Only interested in TCP_LISTEN */
235 if (sk->state != 10)
236 return CG_OK;
237
238 /* It must be a fullsock for cgroup_skb/ingress prog */
239 sk = bpf_sk_fullsock(sk);
240 if (!sk)
241 RET_LOG();
242
243 tp = bpf_tcp_sock(sk);
244 if (!tp)
245 RET_LOG();
246
247 skcpy(&listen_sk, sk);
248 tpcpy(&listen_tp, tp);
249
250 return CG_OK;
251}
252
253char _license[] SEC("license") = "GPL";