Loading...
1// SPDX-License-Identifier: GPL-2.0
2#include <string.h>
3#include <linux/tcp.h>
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <bpf/bpf_helpers.h>
7
8char _license[] SEC("license") = "GPL";
9
10int page_size = 0; /* userspace should set it */
11
12#ifndef SOL_TCP
13#define SOL_TCP IPPROTO_TCP
14#endif
15
16#define SOL_CUSTOM 0xdeadbeef
17
18struct sockopt_sk {
19 __u8 val;
20};
21
22struct {
23 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 __uint(map_flags, BPF_F_NO_PREALLOC);
25 __type(key, int);
26 __type(value, struct sockopt_sk);
27} socket_storage_map SEC(".maps");
28
29SEC("cgroup/getsockopt")
30int _getsockopt(struct bpf_sockopt *ctx)
31{
32 __u8 *optval_end = ctx->optval_end;
33 __u8 *optval = ctx->optval;
34 struct sockopt_sk *storage;
35 struct bpf_sock *sk;
36
37 /* Bypass AF_NETLINK. */
38 sk = ctx->sk;
39 if (sk && sk->family == AF_NETLINK)
40 goto out;
41
42 /* Make sure bpf_get_netns_cookie is callable.
43 */
44 if (bpf_get_netns_cookie(NULL) == 0)
45 return 0;
46
47 if (bpf_get_netns_cookie(ctx) == 0)
48 return 0;
49
50 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
51 /* Not interested in SOL_IP:IP_TOS;
52 * let next BPF program in the cgroup chain or kernel
53 * handle it.
54 */
55 goto out;
56 }
57
58 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
59 /* Not interested in SOL_SOCKET:SO_SNDBUF;
60 * let next BPF program in the cgroup chain or kernel
61 * handle it.
62 */
63 goto out;
64 }
65
66 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
67 /* Not interested in SOL_TCP:TCP_CONGESTION;
68 * let next BPF program in the cgroup chain or kernel
69 * handle it.
70 */
71 goto out;
72 }
73
74 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
75 /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
76 * It has a custom implementation for performance
77 * reasons.
78 */
79
80 /* Check that optval contains address (__u64) */
81 if (optval + sizeof(__u64) > optval_end)
82 return 0; /* bounds check */
83
84 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
85 return 0; /* unexpected data */
86
87 goto out;
88 }
89
90 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
91 if (optval + 1 > optval_end)
92 return 0; /* bounds check */
93
94 ctx->retval = 0; /* Reset system call return value to zero */
95
96 /* Always export 0x55 */
97 optval[0] = 0x55;
98 ctx->optlen = 1;
99
100 /* Userspace buffer is PAGE_SIZE * 2, but BPF
101 * program can only see the first PAGE_SIZE
102 * bytes of data.
103 */
104 if (optval_end - optval != page_size)
105 return 0; /* unexpected data size */
106
107 return 1;
108 }
109
110 if (ctx->level != SOL_CUSTOM)
111 return 0; /* deny everything except custom level */
112
113 if (optval + 1 > optval_end)
114 return 0; /* bounds check */
115
116 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
117 BPF_SK_STORAGE_GET_F_CREATE);
118 if (!storage)
119 return 0; /* couldn't get sk storage */
120
121 if (!ctx->retval)
122 return 0; /* kernel should not have handled
123 * SOL_CUSTOM, something is wrong!
124 */
125 ctx->retval = 0; /* Reset system call return value to zero */
126
127 optval[0] = storage->val;
128 ctx->optlen = 1;
129
130 return 1;
131
132out:
133 /* optval larger than PAGE_SIZE use kernel's buffer. */
134 if (ctx->optlen > page_size)
135 ctx->optlen = 0;
136 return 1;
137}
138
139SEC("cgroup/setsockopt")
140int _setsockopt(struct bpf_sockopt *ctx)
141{
142 __u8 *optval_end = ctx->optval_end;
143 __u8 *optval = ctx->optval;
144 struct sockopt_sk *storage;
145 struct bpf_sock *sk;
146
147 /* Bypass AF_NETLINK. */
148 sk = ctx->sk;
149 if (sk && sk->family == AF_NETLINK)
150 goto out;
151
152 /* Make sure bpf_get_netns_cookie is callable.
153 */
154 if (bpf_get_netns_cookie(NULL) == 0)
155 return 0;
156
157 if (bpf_get_netns_cookie(ctx) == 0)
158 return 0;
159
160 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
161 /* Not interested in SOL_IP:IP_TOS;
162 * let next BPF program in the cgroup chain or kernel
163 * handle it.
164 */
165 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
166 return 1;
167 }
168
169 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
170 /* Overwrite SO_SNDBUF value */
171
172 if (optval + sizeof(__u32) > optval_end)
173 return 0; /* bounds check */
174
175 *(__u32 *)optval = 0x55AA;
176 ctx->optlen = 4;
177
178 return 1;
179 }
180
181 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
182 /* Always use cubic */
183
184 if (optval + 5 > optval_end)
185 return 0; /* bounds check */
186
187 memcpy(optval, "cubic", 5);
188 ctx->optlen = 5;
189
190 return 1;
191 }
192
193 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
194 /* Original optlen is larger than PAGE_SIZE. */
195 if (ctx->optlen != page_size * 2)
196 return 0; /* unexpected data size */
197
198 if (optval + 1 > optval_end)
199 return 0; /* bounds check */
200
201 /* Make sure we can trim the buffer. */
202 optval[0] = 0;
203 ctx->optlen = 1;
204
205 /* Usepace buffer is PAGE_SIZE * 2, but BPF
206 * program can only see the first PAGE_SIZE
207 * bytes of data.
208 */
209 if (optval_end - optval != page_size)
210 return 0; /* unexpected data size */
211
212 return 1;
213 }
214
215 if (ctx->level != SOL_CUSTOM)
216 return 0; /* deny everything except custom level */
217
218 if (optval + 1 > optval_end)
219 return 0; /* bounds check */
220
221 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
222 BPF_SK_STORAGE_GET_F_CREATE);
223 if (!storage)
224 return 0; /* couldn't get sk storage */
225
226 storage->val = optval[0];
227 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
228 * setsockopt handler.
229 */
230
231 return 1;
232
233out:
234 /* optval larger than PAGE_SIZE use kernel's buffer. */
235 if (ctx->optlen > page_size)
236 ctx->optlen = 0;
237 return 1;
238}
1// SPDX-License-Identifier: GPL-2.0
2#include <string.h>
3#include <linux/tcp.h>
4#include <linux/bpf.h>
5#include <netinet/in.h>
6#include <bpf/bpf_helpers.h>
7
8char _license[] SEC("license") = "GPL";
9
10int page_size = 0; /* userspace should set it */
11
12#ifndef SOL_TCP
13#define SOL_TCP IPPROTO_TCP
14#endif
15
16#define SOL_CUSTOM 0xdeadbeef
17
18struct sockopt_sk {
19 __u8 val;
20};
21
22struct {
23 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 __uint(map_flags, BPF_F_NO_PREALLOC);
25 __type(key, int);
26 __type(value, struct sockopt_sk);
27} socket_storage_map SEC(".maps");
28
29SEC("cgroup/getsockopt")
30int _getsockopt(struct bpf_sockopt *ctx)
31{
32 __u8 *optval_end = ctx->optval_end;
33 __u8 *optval = ctx->optval;
34 struct sockopt_sk *storage;
35
36 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
37 /* Not interested in SOL_IP:IP_TOS;
38 * let next BPF program in the cgroup chain or kernel
39 * handle it.
40 */
41 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
42 return 1;
43 }
44
45 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
46 /* Not interested in SOL_SOCKET:SO_SNDBUF;
47 * let next BPF program in the cgroup chain or kernel
48 * handle it.
49 */
50 return 1;
51 }
52
53 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
54 /* Not interested in SOL_TCP:TCP_CONGESTION;
55 * let next BPF program in the cgroup chain or kernel
56 * handle it.
57 */
58 return 1;
59 }
60
61 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
62 /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
63 * It has a custom implementation for performance
64 * reasons.
65 */
66
67 if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
68 return 0; /* EPERM, bounds check */
69
70 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
71 return 0; /* EPERM, unexpected data */
72
73 return 1;
74 }
75
76 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
77 if (optval + 1 > optval_end)
78 return 0; /* EPERM, bounds check */
79
80 ctx->retval = 0; /* Reset system call return value to zero */
81
82 /* Always export 0x55 */
83 optval[0] = 0x55;
84 ctx->optlen = 1;
85
86 /* Userspace buffer is PAGE_SIZE * 2, but BPF
87 * program can only see the first PAGE_SIZE
88 * bytes of data.
89 */
90 if (optval_end - optval != page_size)
91 return 0; /* EPERM, unexpected data size */
92
93 return 1;
94 }
95
96 if (ctx->level != SOL_CUSTOM)
97 return 0; /* EPERM, deny everything except custom level */
98
99 if (optval + 1 > optval_end)
100 return 0; /* EPERM, bounds check */
101
102 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
103 BPF_SK_STORAGE_GET_F_CREATE);
104 if (!storage)
105 return 0; /* EPERM, couldn't get sk storage */
106
107 if (!ctx->retval)
108 return 0; /* EPERM, kernel should not have handled
109 * SOL_CUSTOM, something is wrong!
110 */
111 ctx->retval = 0; /* Reset system call return value to zero */
112
113 optval[0] = storage->val;
114 ctx->optlen = 1;
115
116 return 1;
117}
118
119SEC("cgroup/setsockopt")
120int _setsockopt(struct bpf_sockopt *ctx)
121{
122 __u8 *optval_end = ctx->optval_end;
123 __u8 *optval = ctx->optval;
124 struct sockopt_sk *storage;
125
126 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
127 /* Not interested in SOL_IP:IP_TOS;
128 * let next BPF program in the cgroup chain or kernel
129 * handle it.
130 */
131 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
132 return 1;
133 }
134
135 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
136 /* Overwrite SO_SNDBUF value */
137
138 if (optval + sizeof(__u32) > optval_end)
139 return 0; /* EPERM, bounds check */
140
141 *(__u32 *)optval = 0x55AA;
142 ctx->optlen = 4;
143
144 return 1;
145 }
146
147 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
148 /* Always use cubic */
149
150 if (optval + 5 > optval_end)
151 return 0; /* EPERM, bounds check */
152
153 memcpy(optval, "cubic", 5);
154 ctx->optlen = 5;
155
156 return 1;
157 }
158
159 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
160 /* Original optlen is larger than PAGE_SIZE. */
161 if (ctx->optlen != page_size * 2)
162 return 0; /* EPERM, unexpected data size */
163
164 if (optval + 1 > optval_end)
165 return 0; /* EPERM, bounds check */
166
167 /* Make sure we can trim the buffer. */
168 optval[0] = 0;
169 ctx->optlen = 1;
170
171 /* Usepace buffer is PAGE_SIZE * 2, but BPF
172 * program can only see the first PAGE_SIZE
173 * bytes of data.
174 */
175 if (optval_end - optval != page_size)
176 return 0; /* EPERM, unexpected data size */
177
178 return 1;
179 }
180
181 if (ctx->level != SOL_CUSTOM)
182 return 0; /* EPERM, deny everything except custom level */
183
184 if (optval + 1 > optval_end)
185 return 0; /* EPERM, bounds check */
186
187 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
188 BPF_SK_STORAGE_GET_F_CREATE);
189 if (!storage)
190 return 0; /* EPERM, couldn't get sk storage */
191
192 storage->val = optval[0];
193 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
194 * setsockopt handler.
195 */
196
197 return 1;
198}