Linux Audio

Check our new training course

In-person Linux kernel drivers training

Jun 16-20, 2025
Register
Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019 Facebook */
  3
  4#include <linux/bpf.h>
  5#include <netinet/in.h>
  6#include <stdbool.h>
  7
  8#include <bpf/bpf_helpers.h>
  9#include <bpf/bpf_endian.h>
 
 10
 11enum bpf_linum_array_idx {
 12	EGRESS_LINUM_IDX,
 13	INGRESS_LINUM_IDX,
 14	READ_SK_DST_PORT_LINUM_IDX,
 15	__NR_BPF_LINUM_ARRAY_IDX,
 16};
 17
 18struct {
 19	__uint(type, BPF_MAP_TYPE_ARRAY);
 20	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
 21	__type(key, __u32);
 22	__type(value, __u32);
 23} linum_map SEC(".maps");
 24
 25struct bpf_spinlock_cnt {
 26	struct bpf_spin_lock lock;
 27	__u32 cnt;
 28};
 29
 30struct {
 31	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 32	__uint(map_flags, BPF_F_NO_PREALLOC);
 33	__type(key, int);
 34	__type(value, struct bpf_spinlock_cnt);
 35} sk_pkt_out_cnt SEC(".maps");
 36
 37struct {
 38	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 39	__uint(map_flags, BPF_F_NO_PREALLOC);
 40	__type(key, int);
 41	__type(value, struct bpf_spinlock_cnt);
 42} sk_pkt_out_cnt10 SEC(".maps");
 43
 44struct tcp_sock {
 45	__u32	lsndtime;
 46} __attribute__((preserve_access_index));
 47
 48struct bpf_tcp_sock listen_tp = {};
 49struct sockaddr_in6 srv_sa6 = {};
 50struct bpf_tcp_sock cli_tp = {};
 51struct bpf_tcp_sock srv_tp = {};
 52struct bpf_sock listen_sk = {};
 53struct bpf_sock srv_sk = {};
 54struct bpf_sock cli_sk = {};
 55__u64 parent_cg_id = 0;
 56__u64 child_cg_id = 0;
 57__u64 lsndtime = 0;
 58
 59static bool is_loopback6(__u32 *a6)
 60{
 61	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
 62}
 63
 64static void skcpy(struct bpf_sock *dst,
 65		  const struct bpf_sock *src)
 66{
 67	dst->bound_dev_if = src->bound_dev_if;
 68	dst->family = src->family;
 69	dst->type = src->type;
 70	dst->protocol = src->protocol;
 71	dst->mark = src->mark;
 72	dst->priority = src->priority;
 73	dst->src_ip4 = src->src_ip4;
 74	dst->src_ip6[0] = src->src_ip6[0];
 75	dst->src_ip6[1] = src->src_ip6[1];
 76	dst->src_ip6[2] = src->src_ip6[2];
 77	dst->src_ip6[3] = src->src_ip6[3];
 78	dst->src_port = src->src_port;
 79	dst->dst_ip4 = src->dst_ip4;
 80	dst->dst_ip6[0] = src->dst_ip6[0];
 81	dst->dst_ip6[1] = src->dst_ip6[1];
 82	dst->dst_ip6[2] = src->dst_ip6[2];
 83	dst->dst_ip6[3] = src->dst_ip6[3];
 84	dst->dst_port = src->dst_port;
 85	dst->state = src->state;
 86}
 87
 88static void tpcpy(struct bpf_tcp_sock *dst,
 89		  const struct bpf_tcp_sock *src)
 90{
 91	dst->snd_cwnd = src->snd_cwnd;
 92	dst->srtt_us = src->srtt_us;
 93	dst->rtt_min = src->rtt_min;
 94	dst->snd_ssthresh = src->snd_ssthresh;
 95	dst->rcv_nxt = src->rcv_nxt;
 96	dst->snd_nxt = src->snd_nxt;
 97	dst->snd_una = src->snd_una;
 98	dst->mss_cache = src->mss_cache;
 99	dst->ecn_flags = src->ecn_flags;
100	dst->rate_delivered = src->rate_delivered;
101	dst->rate_interval_us = src->rate_interval_us;
102	dst->packets_out = src->packets_out;
103	dst->retrans_out = src->retrans_out;
104	dst->total_retrans = src->total_retrans;
105	dst->segs_in = src->segs_in;
106	dst->data_segs_in = src->data_segs_in;
107	dst->segs_out = src->segs_out;
108	dst->data_segs_out = src->data_segs_out;
109	dst->lost_out = src->lost_out;
110	dst->sacked_out = src->sacked_out;
111	dst->bytes_received = src->bytes_received;
112	dst->bytes_acked = src->bytes_acked;
113}
114
115/* Always return CG_OK so that no pkt will be filtered out */
116#define CG_OK 1
117
118#define RET_LOG() ({						\
119	linum = __LINE__;					\
120	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY);	\
121	return CG_OK;						\
122})
123
124SEC("cgroup_skb/egress")
125int egress_read_sock_fields(struct __sk_buff *skb)
126{
127	struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
128	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
129	struct bpf_tcp_sock *tp, *tp_ret;
130	struct bpf_sock *sk, *sk_ret;
131	__u32 linum, linum_idx;
132	struct tcp_sock *ktp;
133
134	linum_idx = EGRESS_LINUM_IDX;
135
136	sk = skb->sk;
137	if (!sk)
138		RET_LOG();
139
140	/* Not testing the egress traffic or the listening socket,
141	 * which are covered by the cgroup_skb/ingress test program.
142	 */
143	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
144	    sk->state == BPF_TCP_LISTEN)
145		return CG_OK;
146
147	if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
148		/* Server socket */
149		sk_ret = &srv_sk;
150		tp_ret = &srv_tp;
151	} else if (sk->dst_port == srv_sa6.sin6_port) {
152		/* Client socket */
153		sk_ret = &cli_sk;
154		tp_ret = &cli_tp;
155	} else {
156		/* Not the testing egress traffic */
157		return CG_OK;
158	}
159
160	/* It must be a fullsock for cgroup_skb/egress prog */
161	sk = bpf_sk_fullsock(sk);
162	if (!sk)
163		RET_LOG();
164
165	/* Not the testing egress traffic */
166	if (sk->protocol != IPPROTO_TCP)
167		return CG_OK;
168
169	tp = bpf_tcp_sock(sk);
170	if (!tp)
171		RET_LOG();
172
173	skcpy(sk_ret, sk);
174	tpcpy(tp_ret, tp);
175
176	if (sk_ret == &srv_sk) {
177		ktp = bpf_skc_to_tcp_sock(sk);
178
179		if (!ktp)
180			RET_LOG();
181
182		lsndtime = ktp->lsndtime;
183
184		child_cg_id = bpf_sk_cgroup_id(ktp);
185		if (!child_cg_id)
186			RET_LOG();
187
188		parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
189		if (!parent_cg_id)
190			RET_LOG();
191
192		/* The userspace has created it for srv sk */
193		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
194		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
195						   0, 0);
196	} else {
197		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
198						 &cli_cnt_init,
199						 BPF_SK_STORAGE_GET_F_CREATE);
200		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
201						   sk, &cli_cnt_init,
202						   BPF_SK_STORAGE_GET_F_CREATE);
203	}
204
205	if (!pkt_out_cnt || !pkt_out_cnt10)
206		RET_LOG();
207
208	/* Even both cnt and cnt10 have lock defined in their BTF,
209	 * intentionally one cnt takes lock while one does not
210	 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
211	 */
212	pkt_out_cnt->cnt += 1;
213	bpf_spin_lock(&pkt_out_cnt10->lock);
214	pkt_out_cnt10->cnt += 10;
215	bpf_spin_unlock(&pkt_out_cnt10->lock);
216
217	return CG_OK;
218}
219
220SEC("cgroup_skb/ingress")
221int ingress_read_sock_fields(struct __sk_buff *skb)
222{
223	struct bpf_tcp_sock *tp;
224	__u32 linum, linum_idx;
225	struct bpf_sock *sk;
226
227	linum_idx = INGRESS_LINUM_IDX;
228
229	sk = skb->sk;
230	if (!sk)
231		RET_LOG();
232
233	/* Not the testing ingress traffic to the server */
234	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
235	    sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
236		return CG_OK;
237
238	/* Only interested in the listening socket */
239	if (sk->state != BPF_TCP_LISTEN)
240		return CG_OK;
241
242	/* It must be a fullsock for cgroup_skb/ingress prog */
243	sk = bpf_sk_fullsock(sk);
244	if (!sk)
245		RET_LOG();
246
247	tp = bpf_tcp_sock(sk);
248	if (!tp)
249		RET_LOG();
250
251	skcpy(&listen_sk, sk);
252	tpcpy(&listen_tp, tp);
253
254	return CG_OK;
255}
256
257/*
258 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
259 * gets rewritten by the access converter to a 2-byte load for
260 * backward compatibility. Treating the load result as a be16 value
261 * makes the code portable across little- and big-endian platforms.
262 */
263static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
264{
265	__u32 *word = (__u32 *)&sk->dst_port;
266	return word[0] == bpf_htons(0xcafe);
267}
268
269static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
270{
271	__u16 *half;
272
273	asm volatile ("");
274	half = (__u16 *)&sk->dst_port;
275	return half[0] == bpf_htons(0xcafe);
276}
277
278static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
279{
280	__u8 *byte = (__u8 *)&sk->dst_port;
281	return byte[0] == 0xca && byte[1] == 0xfe;
282}
283
284SEC("cgroup_skb/egress")
285int read_sk_dst_port(struct __sk_buff *skb)
286{
287	__u32 linum, linum_idx;
288	struct bpf_sock *sk;
289
290	linum_idx = READ_SK_DST_PORT_LINUM_IDX;
291
292	sk = skb->sk;
293	if (!sk)
294		RET_LOG();
295
296	/* Ignore everything but the SYN from the client socket */
297	if (sk->state != BPF_TCP_SYN_SENT)
298		return CG_OK;
299
300	if (!sk_dst_port__load_word(sk))
301		RET_LOG();
302	if (!sk_dst_port__load_half(sk))
303		RET_LOG();
304	if (!sk_dst_port__load_byte(sk))
305		RET_LOG();
306
307	return CG_OK;
308}
309
310char _license[] SEC("license") = "GPL";
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019 Facebook */
  3
  4#include <linux/bpf.h>
  5#include <netinet/in.h>
  6#include <stdbool.h>
  7
  8#include <bpf/bpf_helpers.h>
  9#include <bpf/bpf_endian.h>
 10#include "bpf_tcp_helpers.h"
 11
 12enum bpf_linum_array_idx {
 13	EGRESS_LINUM_IDX,
 14	INGRESS_LINUM_IDX,
 15	READ_SK_DST_PORT_LINUM_IDX,
 16	__NR_BPF_LINUM_ARRAY_IDX,
 17};
 18
 19struct {
 20	__uint(type, BPF_MAP_TYPE_ARRAY);
 21	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
 22	__type(key, __u32);
 23	__type(value, __u32);
 24} linum_map SEC(".maps");
 25
 26struct bpf_spinlock_cnt {
 27	struct bpf_spin_lock lock;
 28	__u32 cnt;
 29};
 30
 31struct {
 32	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 33	__uint(map_flags, BPF_F_NO_PREALLOC);
 34	__type(key, int);
 35	__type(value, struct bpf_spinlock_cnt);
 36} sk_pkt_out_cnt SEC(".maps");
 37
 38struct {
 39	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 40	__uint(map_flags, BPF_F_NO_PREALLOC);
 41	__type(key, int);
 42	__type(value, struct bpf_spinlock_cnt);
 43} sk_pkt_out_cnt10 SEC(".maps");
 
 
 
 
 44
 45struct bpf_tcp_sock listen_tp = {};
 46struct sockaddr_in6 srv_sa6 = {};
 47struct bpf_tcp_sock cli_tp = {};
 48struct bpf_tcp_sock srv_tp = {};
 49struct bpf_sock listen_sk = {};
 50struct bpf_sock srv_sk = {};
 51struct bpf_sock cli_sk = {};
 52__u64 parent_cg_id = 0;
 53__u64 child_cg_id = 0;
 54__u64 lsndtime = 0;
 55
 56static bool is_loopback6(__u32 *a6)
 57{
 58	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
 59}
 60
 61static void skcpy(struct bpf_sock *dst,
 62		  const struct bpf_sock *src)
 63{
 64	dst->bound_dev_if = src->bound_dev_if;
 65	dst->family = src->family;
 66	dst->type = src->type;
 67	dst->protocol = src->protocol;
 68	dst->mark = src->mark;
 69	dst->priority = src->priority;
 70	dst->src_ip4 = src->src_ip4;
 71	dst->src_ip6[0] = src->src_ip6[0];
 72	dst->src_ip6[1] = src->src_ip6[1];
 73	dst->src_ip6[2] = src->src_ip6[2];
 74	dst->src_ip6[3] = src->src_ip6[3];
 75	dst->src_port = src->src_port;
 76	dst->dst_ip4 = src->dst_ip4;
 77	dst->dst_ip6[0] = src->dst_ip6[0];
 78	dst->dst_ip6[1] = src->dst_ip6[1];
 79	dst->dst_ip6[2] = src->dst_ip6[2];
 80	dst->dst_ip6[3] = src->dst_ip6[3];
 81	dst->dst_port = src->dst_port;
 82	dst->state = src->state;
 83}
 84
 85static void tpcpy(struct bpf_tcp_sock *dst,
 86		  const struct bpf_tcp_sock *src)
 87{
 88	dst->snd_cwnd = src->snd_cwnd;
 89	dst->srtt_us = src->srtt_us;
 90	dst->rtt_min = src->rtt_min;
 91	dst->snd_ssthresh = src->snd_ssthresh;
 92	dst->rcv_nxt = src->rcv_nxt;
 93	dst->snd_nxt = src->snd_nxt;
 94	dst->snd_una = src->snd_una;
 95	dst->mss_cache = src->mss_cache;
 96	dst->ecn_flags = src->ecn_flags;
 97	dst->rate_delivered = src->rate_delivered;
 98	dst->rate_interval_us = src->rate_interval_us;
 99	dst->packets_out = src->packets_out;
100	dst->retrans_out = src->retrans_out;
101	dst->total_retrans = src->total_retrans;
102	dst->segs_in = src->segs_in;
103	dst->data_segs_in = src->data_segs_in;
104	dst->segs_out = src->segs_out;
105	dst->data_segs_out = src->data_segs_out;
106	dst->lost_out = src->lost_out;
107	dst->sacked_out = src->sacked_out;
108	dst->bytes_received = src->bytes_received;
109	dst->bytes_acked = src->bytes_acked;
110}
111
112/* Always return CG_OK so that no pkt will be filtered out */
113#define CG_OK 1
114
115#define RET_LOG() ({						\
116	linum = __LINE__;					\
117	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY);	\
118	return CG_OK;						\
119})
120
121SEC("cgroup_skb/egress")
122int egress_read_sock_fields(struct __sk_buff *skb)
123{
124	struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
125	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
126	struct bpf_tcp_sock *tp, *tp_ret;
127	struct bpf_sock *sk, *sk_ret;
128	__u32 linum, linum_idx;
129	struct tcp_sock *ktp;
130
131	linum_idx = EGRESS_LINUM_IDX;
132
133	sk = skb->sk;
134	if (!sk)
135		RET_LOG();
136
137	/* Not testing the egress traffic or the listening socket,
138	 * which are covered by the cgroup_skb/ingress test program.
139	 */
140	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
141	    sk->state == BPF_TCP_LISTEN)
142		return CG_OK;
143
144	if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
145		/* Server socket */
146		sk_ret = &srv_sk;
147		tp_ret = &srv_tp;
148	} else if (sk->dst_port == srv_sa6.sin6_port) {
149		/* Client socket */
150		sk_ret = &cli_sk;
151		tp_ret = &cli_tp;
152	} else {
153		/* Not the testing egress traffic */
154		return CG_OK;
155	}
156
157	/* It must be a fullsock for cgroup_skb/egress prog */
158	sk = bpf_sk_fullsock(sk);
159	if (!sk)
160		RET_LOG();
161
162	/* Not the testing egress traffic */
163	if (sk->protocol != IPPROTO_TCP)
164		return CG_OK;
165
166	tp = bpf_tcp_sock(sk);
167	if (!tp)
168		RET_LOG();
169
170	skcpy(sk_ret, sk);
171	tpcpy(tp_ret, tp);
172
173	if (sk_ret == &srv_sk) {
174		ktp = bpf_skc_to_tcp_sock(sk);
175
176		if (!ktp)
177			RET_LOG();
178
179		lsndtime = ktp->lsndtime;
180
181		child_cg_id = bpf_sk_cgroup_id(ktp);
182		if (!child_cg_id)
183			RET_LOG();
184
185		parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
186		if (!parent_cg_id)
187			RET_LOG();
188
189		/* The userspace has created it for srv sk */
190		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
191		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
192						   0, 0);
193	} else {
194		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
195						 &cli_cnt_init,
196						 BPF_SK_STORAGE_GET_F_CREATE);
197		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
198						   sk, &cli_cnt_init,
199						   BPF_SK_STORAGE_GET_F_CREATE);
200	}
201
202	if (!pkt_out_cnt || !pkt_out_cnt10)
203		RET_LOG();
204
205	/* Even both cnt and cnt10 have lock defined in their BTF,
206	 * intentionally one cnt takes lock while one does not
207	 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
208	 */
209	pkt_out_cnt->cnt += 1;
210	bpf_spin_lock(&pkt_out_cnt10->lock);
211	pkt_out_cnt10->cnt += 10;
212	bpf_spin_unlock(&pkt_out_cnt10->lock);
213
214	return CG_OK;
215}
216
217SEC("cgroup_skb/ingress")
218int ingress_read_sock_fields(struct __sk_buff *skb)
219{
220	struct bpf_tcp_sock *tp;
221	__u32 linum, linum_idx;
222	struct bpf_sock *sk;
223
224	linum_idx = INGRESS_LINUM_IDX;
225
226	sk = skb->sk;
227	if (!sk)
228		RET_LOG();
229
230	/* Not the testing ingress traffic to the server */
231	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
232	    sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
233		return CG_OK;
234
235	/* Only interested in the listening socket */
236	if (sk->state != BPF_TCP_LISTEN)
237		return CG_OK;
238
239	/* It must be a fullsock for cgroup_skb/ingress prog */
240	sk = bpf_sk_fullsock(sk);
241	if (!sk)
242		RET_LOG();
243
244	tp = bpf_tcp_sock(sk);
245	if (!tp)
246		RET_LOG();
247
248	skcpy(&listen_sk, sk);
249	tpcpy(&listen_tp, tp);
250
251	return CG_OK;
252}
253
254/*
255 * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
256 * gets rewritten by the access converter to a 2-byte load for
257 * backward compatibility. Treating the load result as a be16 value
258 * makes the code portable across little- and big-endian platforms.
259 */
260static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
261{
262	__u32 *word = (__u32 *)&sk->dst_port;
263	return word[0] == bpf_htons(0xcafe);
264}
265
266static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
267{
268	__u16 *half;
269
270	asm volatile ("");
271	half = (__u16 *)&sk->dst_port;
272	return half[0] == bpf_htons(0xcafe);
273}
274
275static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
276{
277	__u8 *byte = (__u8 *)&sk->dst_port;
278	return byte[0] == 0xca && byte[1] == 0xfe;
279}
280
281SEC("cgroup_skb/egress")
282int read_sk_dst_port(struct __sk_buff *skb)
283{
284	__u32 linum, linum_idx;
285	struct bpf_sock *sk;
286
287	linum_idx = READ_SK_DST_PORT_LINUM_IDX;
288
289	sk = skb->sk;
290	if (!sk)
291		RET_LOG();
292
293	/* Ignore everything but the SYN from the client socket */
294	if (sk->state != BPF_TCP_SYN_SENT)
295		return CG_OK;
296
297	if (!sk_dst_port__load_word(sk))
298		RET_LOG();
299	if (!sk_dst_port__load_half(sk))
300		RET_LOG();
301	if (!sk_dst_port__load_byte(sk))
302		RET_LOG();
303
304	return CG_OK;
305}
306
307char _license[] SEC("license") = "GPL";