Linux Audio

Check our new training course

In-person Linux kernel drivers training

Jun 16-20, 2025
Register
Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2#include <string.h>
  3#include <linux/tcp.h>
  4#include <linux/bpf.h>
  5#include <netinet/in.h>
  6#include <bpf/bpf_helpers.h>
  7
  8char _license[] SEC("license") = "GPL";
  9
 10int page_size = 0; /* userspace should set it */
 11
 12#ifndef SOL_TCP
 13#define SOL_TCP IPPROTO_TCP
 14#endif
 15
 16#define SOL_CUSTOM			0xdeadbeef
 17
 18struct sockopt_sk {
 19	__u8 val;
 20};
 21
 22struct {
 23	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 24	__uint(map_flags, BPF_F_NO_PREALLOC);
 25	__type(key, int);
 26	__type(value, struct sockopt_sk);
 27} socket_storage_map SEC(".maps");
 28
 29SEC("cgroup/getsockopt")
 30int _getsockopt(struct bpf_sockopt *ctx)
 31{
 32	__u8 *optval_end = ctx->optval_end;
 33	__u8 *optval = ctx->optval;
 34	struct sockopt_sk *storage;
 35	struct bpf_sock *sk;
 36
 37	/* Bypass AF_NETLINK. */
 38	sk = ctx->sk;
 39	if (sk && sk->family == AF_NETLINK)
 40		goto out;
 41
 42	/* Make sure bpf_get_netns_cookie is callable.
 43	 */
 44	if (bpf_get_netns_cookie(NULL) == 0)
 45		return 0;
 46
 47	if (bpf_get_netns_cookie(ctx) == 0)
 48		return 0;
 49
 50	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
 51		/* Not interested in SOL_IP:IP_TOS;
 52		 * let next BPF program in the cgroup chain or kernel
 53		 * handle it.
 54		 */
 55		goto out;
 56	}
 57
 58	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
 59		/* Not interested in SOL_SOCKET:SO_SNDBUF;
 60		 * let next BPF program in the cgroup chain or kernel
 61		 * handle it.
 62		 */
 63		goto out;
 64	}
 65
 66	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
 67		/* Not interested in SOL_TCP:TCP_CONGESTION;
 68		 * let next BPF program in the cgroup chain or kernel
 69		 * handle it.
 70		 */
 71		goto out;
 72	}
 73
 74	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
 75		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
 76		 * It has a custom implementation for performance
 77		 * reasons.
 78		 */
 79
 80		/* Check that optval contains address (__u64) */
 81		if (optval + sizeof(__u64) > optval_end)
 82			return 0; /* bounds check */
 83
 84		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
 85			return 0; /* unexpected data */
 86
 87		goto out;
 88	}
 89
 90	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
 91		if (optval + 1 > optval_end)
 92			return 0; /* bounds check */
 93
 94		ctx->retval = 0; /* Reset system call return value to zero */
 95
 96		/* Always export 0x55 */
 97		optval[0] = 0x55;
 98		ctx->optlen = 1;
 99
100		/* Userspace buffer is PAGE_SIZE * 2, but BPF
101		 * program can only see the first PAGE_SIZE
102		 * bytes of data.
103		 */
104		if (optval_end - optval != page_size)
105			return 0; /* unexpected data size */
106
107		return 1;
108	}
109
110	if (ctx->level != SOL_CUSTOM)
111		return 0; /* deny everything except custom level */
112
113	if (optval + 1 > optval_end)
114		return 0; /* bounds check */
115
116	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
117				     BPF_SK_STORAGE_GET_F_CREATE);
118	if (!storage)
119		return 0; /* couldn't get sk storage */
120
121	if (!ctx->retval)
122		return 0; /* kernel should not have handled
123			   * SOL_CUSTOM, something is wrong!
124			   */
125	ctx->retval = 0; /* Reset system call return value to zero */
126
127	optval[0] = storage->val;
128	ctx->optlen = 1;
129
130	return 1;
131
132out:
133	/* optval larger than PAGE_SIZE use kernel's buffer. */
134	if (ctx->optlen > page_size)
135		ctx->optlen = 0;
136	return 1;
137}
138
139SEC("cgroup/setsockopt")
140int _setsockopt(struct bpf_sockopt *ctx)
141{
142	__u8 *optval_end = ctx->optval_end;
143	__u8 *optval = ctx->optval;
144	struct sockopt_sk *storage;
145	struct bpf_sock *sk;
146
147	/* Bypass AF_NETLINK. */
148	sk = ctx->sk;
149	if (sk && sk->family == AF_NETLINK)
150		goto out;
151
152	/* Make sure bpf_get_netns_cookie is callable.
153	 */
154	if (bpf_get_netns_cookie(NULL) == 0)
155		return 0;
156
157	if (bpf_get_netns_cookie(ctx) == 0)
158		return 0;
159
160	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
161		/* Not interested in SOL_IP:IP_TOS;
162		 * let next BPF program in the cgroup chain or kernel
163		 * handle it.
164		 */
165		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
166		return 1;
167	}
168
169	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
170		/* Overwrite SO_SNDBUF value */
171
172		if (optval + sizeof(__u32) > optval_end)
173			return 0; /* bounds check */
174
175		*(__u32 *)optval = 0x55AA;
176		ctx->optlen = 4;
177
178		return 1;
179	}
180
181	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
182		/* Always use cubic */
183
184		if (optval + 5 > optval_end)
185			return 0; /* bounds check */
186
187		memcpy(optval, "cubic", 5);
188		ctx->optlen = 5;
189
190		return 1;
191	}
192
193	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
194		/* Original optlen is larger than PAGE_SIZE. */
195		if (ctx->optlen != page_size * 2)
196			return 0; /* unexpected data size */
197
198		if (optval + 1 > optval_end)
199			return 0; /* bounds check */
200
201		/* Make sure we can trim the buffer. */
202		optval[0] = 0;
203		ctx->optlen = 1;
204
205		/* Usepace buffer is PAGE_SIZE * 2, but BPF
206		 * program can only see the first PAGE_SIZE
207		 * bytes of data.
208		 */
209		if (optval_end - optval != page_size)
210			return 0; /* unexpected data size */
211
212		return 1;
213	}
214
215	if (ctx->level != SOL_CUSTOM)
216		return 0; /* deny everything except custom level */
217
218	if (optval + 1 > optval_end)
219		return 0; /* bounds check */
220
221	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
222				     BPF_SK_STORAGE_GET_F_CREATE);
223	if (!storage)
224		return 0; /* couldn't get sk storage */
225
226	storage->val = optval[0];
227	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
228			   * setsockopt handler.
229			   */
230
231	return 1;
232
233out:
234	/* optval larger than PAGE_SIZE use kernel's buffer. */
235	if (ctx->optlen > page_size)
236		ctx->optlen = 0;
237	return 1;
238}
v6.8
  1// SPDX-License-Identifier: GPL-2.0
  2#include <string.h>
  3#include <linux/tcp.h>
  4#include <linux/bpf.h>
  5#include <netinet/in.h>
  6#include <bpf/bpf_helpers.h>
  7
  8char _license[] SEC("license") = "GPL";
  9
 10int page_size = 0; /* userspace should set it */
 11
 12#ifndef SOL_TCP
 13#define SOL_TCP IPPROTO_TCP
 14#endif
 15
 16#define SOL_CUSTOM			0xdeadbeef
 17
 18struct sockopt_sk {
 19	__u8 val;
 20};
 21
 22struct {
 23	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 24	__uint(map_flags, BPF_F_NO_PREALLOC);
 25	__type(key, int);
 26	__type(value, struct sockopt_sk);
 27} socket_storage_map SEC(".maps");
 28
 29SEC("cgroup/getsockopt")
 30int _getsockopt(struct bpf_sockopt *ctx)
 31{
 32	__u8 *optval_end = ctx->optval_end;
 33	__u8 *optval = ctx->optval;
 34	struct sockopt_sk *storage;
 35	struct bpf_sock *sk;
 36
 37	/* Bypass AF_NETLINK. */
 38	sk = ctx->sk;
 39	if (sk && sk->family == AF_NETLINK)
 40		goto out;
 41
 42	/* Make sure bpf_get_netns_cookie is callable.
 43	 */
 44	if (bpf_get_netns_cookie(NULL) == 0)
 45		return 0;
 46
 47	if (bpf_get_netns_cookie(ctx) == 0)
 48		return 0;
 49
 50	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
 51		/* Not interested in SOL_IP:IP_TOS;
 52		 * let next BPF program in the cgroup chain or kernel
 53		 * handle it.
 54		 */
 55		goto out;
 56	}
 57
 58	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
 59		/* Not interested in SOL_SOCKET:SO_SNDBUF;
 60		 * let next BPF program in the cgroup chain or kernel
 61		 * handle it.
 62		 */
 63		goto out;
 64	}
 65
 66	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
 67		/* Not interested in SOL_TCP:TCP_CONGESTION;
 68		 * let next BPF program in the cgroup chain or kernel
 69		 * handle it.
 70		 */
 71		goto out;
 72	}
 73
 74	if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
 75		/* Verify that TCP_ZEROCOPY_RECEIVE triggers.
 76		 * It has a custom implementation for performance
 77		 * reasons.
 78		 */
 79
 80		/* Check that optval contains address (__u64) */
 81		if (optval + sizeof(__u64) > optval_end)
 82			return 0; /* bounds check */
 83
 84		if (((struct tcp_zerocopy_receive *)optval)->address != 0)
 85			return 0; /* unexpected data */
 86
 87		goto out;
 88	}
 89
 90	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
 91		if (optval + 1 > optval_end)
 92			return 0; /* bounds check */
 93
 94		ctx->retval = 0; /* Reset system call return value to zero */
 95
 96		/* Always export 0x55 */
 97		optval[0] = 0x55;
 98		ctx->optlen = 1;
 99
100		/* Userspace buffer is PAGE_SIZE * 2, but BPF
101		 * program can only see the first PAGE_SIZE
102		 * bytes of data.
103		 */
104		if (optval_end - optval != page_size)
105			return 0; /* unexpected data size */
106
107		return 1;
108	}
109
110	if (ctx->level != SOL_CUSTOM)
111		return 0; /* deny everything except custom level */
112
113	if (optval + 1 > optval_end)
114		return 0; /* bounds check */
115
116	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
117				     BPF_SK_STORAGE_GET_F_CREATE);
118	if (!storage)
119		return 0; /* couldn't get sk storage */
120
121	if (!ctx->retval)
122		return 0; /* kernel should not have handled
123			   * SOL_CUSTOM, something is wrong!
124			   */
125	ctx->retval = 0; /* Reset system call return value to zero */
126
127	optval[0] = storage->val;
128	ctx->optlen = 1;
129
130	return 1;
131
132out:
133	/* optval larger than PAGE_SIZE use kernel's buffer. */
134	if (ctx->optlen > page_size)
135		ctx->optlen = 0;
136	return 1;
137}
138
139SEC("cgroup/setsockopt")
140int _setsockopt(struct bpf_sockopt *ctx)
141{
142	__u8 *optval_end = ctx->optval_end;
143	__u8 *optval = ctx->optval;
144	struct sockopt_sk *storage;
145	struct bpf_sock *sk;
146
147	/* Bypass AF_NETLINK. */
148	sk = ctx->sk;
149	if (sk && sk->family == AF_NETLINK)
150		goto out;
151
152	/* Make sure bpf_get_netns_cookie is callable.
153	 */
154	if (bpf_get_netns_cookie(NULL) == 0)
155		return 0;
156
157	if (bpf_get_netns_cookie(ctx) == 0)
158		return 0;
159
160	if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
161		/* Not interested in SOL_IP:IP_TOS;
162		 * let next BPF program in the cgroup chain or kernel
163		 * handle it.
164		 */
165		ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
166		return 1;
167	}
168
169	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
170		/* Overwrite SO_SNDBUF value */
171
172		if (optval + sizeof(__u32) > optval_end)
173			return 0; /* bounds check */
174
175		*(__u32 *)optval = 0x55AA;
176		ctx->optlen = 4;
177
178		return 1;
179	}
180
181	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
182		/* Always use cubic */
183
184		if (optval + 5 > optval_end)
185			return 0; /* bounds check */
186
187		memcpy(optval, "cubic", 5);
188		ctx->optlen = 5;
189
190		return 1;
191	}
192
193	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
194		/* Original optlen is larger than PAGE_SIZE. */
195		if (ctx->optlen != page_size * 2)
196			return 0; /* unexpected data size */
197
198		if (optval + 1 > optval_end)
199			return 0; /* bounds check */
200
201		/* Make sure we can trim the buffer. */
202		optval[0] = 0;
203		ctx->optlen = 1;
204
205		/* Usepace buffer is PAGE_SIZE * 2, but BPF
206		 * program can only see the first PAGE_SIZE
207		 * bytes of data.
208		 */
209		if (optval_end - optval != page_size)
210			return 0; /* unexpected data size */
211
212		return 1;
213	}
214
215	if (ctx->level != SOL_CUSTOM)
216		return 0; /* deny everything except custom level */
217
218	if (optval + 1 > optval_end)
219		return 0; /* bounds check */
220
221	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
222				     BPF_SK_STORAGE_GET_F_CREATE);
223	if (!storage)
224		return 0; /* couldn't get sk storage */
225
226	storage->val = optval[0];
227	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
228			   * setsockopt handler.
229			   */
230
231	return 1;
232
233out:
234	/* optval larger than PAGE_SIZE use kernel's buffer. */
235	if (ctx->optlen > page_size)
236		ctx->optlen = 0;
237	return 1;
238}