Loading...
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018 Facebook */
3
4#include <stdlib.h>
5#include <unistd.h>
6#include <stdbool.h>
7#include <string.h>
8#include <errno.h>
9#include <assert.h>
10#include <fcntl.h>
11#include <linux/bpf.h>
12#include <linux/err.h>
13#include <linux/types.h>
14#include <linux/if_ether.h>
15#include <sys/types.h>
16#include <sys/epoll.h>
17#include <sys/socket.h>
18#include <netinet/in.h>
19#include <bpf/bpf.h>
20#include <bpf/libbpf.h>
21#include "bpf_util.h"
22
23#include "test_progs.h"
24#include "test_select_reuseport_common.h"
25
26#define MAX_TEST_NAME 80
27#define MIN_TCPHDR_LEN 20
28#define UDPHDR_LEN 8
29
30#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
31#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
32#define REUSEPORT_ARRAY_SIZE 32
33
34static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
35static __u32 expected_results[NR_RESULTS];
36static int sk_fds[REUSEPORT_ARRAY_SIZE];
37static int reuseport_array = -1, outer_map = -1;
38static enum bpf_map_type inner_map_type;
39static int select_by_skb_data_prog;
40static struct bpf_object *obj;
41static __u32 index_zero;
42static int epfd;
43
44static union sa46 {
45 struct sockaddr_in6 v6;
46 struct sockaddr_in v4;
47 sa_family_t family;
48} srv_sa;
49
50#define RET_IF(condition, tag, format...) ({ \
51 if (CHECK_FAIL(condition)) { \
52 printf(tag " " format); \
53 return; \
54 } \
55})
56
57#define RET_ERR(condition, tag, format...) ({ \
58 if (CHECK_FAIL(condition)) { \
59 printf(tag " " format); \
60 return -1; \
61 } \
62})
63
64static int create_maps(enum bpf_map_type inner_type)
65{
66 LIBBPF_OPTS(bpf_map_create_opts, opts);
67
68 inner_map_type = inner_type;
69
70 /* Creating reuseport_array */
71 reuseport_array = bpf_map_create(inner_type, "reuseport_array",
72 sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL);
73 RET_ERR(reuseport_array < 0, "creating reuseport_array",
74 "reuseport_array:%d errno:%d\n", reuseport_array, errno);
75
76 /* Creating outer_map */
77 opts.inner_map_fd = reuseport_array;
78 outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map",
79 sizeof(__u32), sizeof(__u32), 1, &opts);
80 RET_ERR(outer_map < 0, "creating outer_map",
81 "outer_map:%d errno:%d\n", outer_map, errno);
82
83 return 0;
84}
85
86static int prepare_bpf_obj(void)
87{
88 struct bpf_program *prog;
89 struct bpf_map *map;
90 int err;
91
92 obj = bpf_object__open("test_select_reuseport_kern.bpf.o");
93 err = libbpf_get_error(obj);
94 RET_ERR(err, "open test_select_reuseport_kern.bpf.o",
95 "obj:%p PTR_ERR(obj):%d\n", obj, err);
96
97 map = bpf_object__find_map_by_name(obj, "outer_map");
98 RET_ERR(!map, "find outer_map", "!map\n");
99 err = bpf_map__reuse_fd(map, outer_map);
100 RET_ERR(err, "reuse outer_map", "err:%d\n", err);
101
102 err = bpf_object__load(obj);
103 RET_ERR(err, "load bpf_object", "err:%d\n", err);
104
105 prog = bpf_object__next_program(obj, NULL);
106 RET_ERR(!prog, "get first bpf_program", "!prog\n");
107 select_by_skb_data_prog = bpf_program__fd(prog);
108 RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
109 "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
110
111 map = bpf_object__find_map_by_name(obj, "result_map");
112 RET_ERR(!map, "find result_map", "!map\n");
113 result_map = bpf_map__fd(map);
114 RET_ERR(result_map < 0, "get result_map fd",
115 "result_map:%d\n", result_map);
116
117 map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
118 RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
119 tmp_index_ovr_map = bpf_map__fd(map);
120 RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
121 "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
122
123 map = bpf_object__find_map_by_name(obj, "linum_map");
124 RET_ERR(!map, "find linum_map", "!map\n");
125 linum_map = bpf_map__fd(map);
126 RET_ERR(linum_map < 0, "get linum_map fd",
127 "linum_map:%d\n", linum_map);
128
129 map = bpf_object__find_map_by_name(obj, "data_check_map");
130 RET_ERR(!map, "find data_check_map", "!map\n");
131 data_check_map = bpf_map__fd(map);
132 RET_ERR(data_check_map < 0, "get data_check_map fd",
133 "data_check_map:%d\n", data_check_map);
134
135 return 0;
136}
137
138static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
139{
140 memset(sa, 0, sizeof(*sa));
141 sa->family = family;
142 if (sa->family == AF_INET6)
143 sa->v6.sin6_addr = in6addr_loopback;
144 else
145 sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
146}
147
148static void sa46_init_inany(union sa46 *sa, sa_family_t family)
149{
150 memset(sa, 0, sizeof(*sa));
151 sa->family = family;
152 if (sa->family == AF_INET6)
153 sa->v6.sin6_addr = in6addr_any;
154 else
155 sa->v4.sin_addr.s_addr = INADDR_ANY;
156}
157
158static int read_int_sysctl(const char *sysctl)
159{
160 char buf[16];
161 int fd, ret;
162
163 fd = open(sysctl, 0);
164 RET_ERR(fd == -1, "open(sysctl)",
165 "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
166
167 ret = read(fd, buf, sizeof(buf));
168 RET_ERR(ret <= 0, "read(sysctl)",
169 "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
170
171 close(fd);
172 return atoi(buf);
173}
174
175static int write_int_sysctl(const char *sysctl, int v)
176{
177 int fd, ret, size;
178 char buf[16];
179
180 fd = open(sysctl, O_RDWR);
181 RET_ERR(fd == -1, "open(sysctl)",
182 "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
183
184 size = snprintf(buf, sizeof(buf), "%d", v);
185 ret = write(fd, buf, size);
186 RET_ERR(ret != size, "write(sysctl)",
187 "sysctl:%s ret:%d size:%d errno:%d\n",
188 sysctl, ret, size, errno);
189
190 close(fd);
191 return 0;
192}
193
194static int enable_fastopen(void)
195{
196 int fo;
197
198 fo = read_int_sysctl(TCP_FO_SYSCTL);
199 if (fo < 0)
200 return -1;
201
202 return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
203}
204
205static int enable_syncookie(void)
206{
207 return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
208}
209
210static int disable_syncookie(void)
211{
212 return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
213}
214
215static long get_linum(void)
216{
217 __u32 linum;
218 int err;
219
220 err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
221 RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
222 err, errno);
223
224 return linum;
225}
226
227static void check_data(int type, sa_family_t family, const struct cmd *cmd,
228 int cli_fd)
229{
230 struct data_check expected = {}, result;
231 union sa46 cli_sa;
232 socklen_t addrlen;
233 int err;
234
235 addrlen = sizeof(cli_sa);
236 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
237 &addrlen);
238 RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
239 err, errno);
240
241 err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
242 RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
243 err, errno);
244
245 if (type == SOCK_STREAM) {
246 expected.len = MIN_TCPHDR_LEN;
247 expected.ip_protocol = IPPROTO_TCP;
248 } else {
249 expected.len = UDPHDR_LEN;
250 expected.ip_protocol = IPPROTO_UDP;
251 }
252
253 if (family == AF_INET6) {
254 expected.eth_protocol = htons(ETH_P_IPV6);
255 expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
256 !srv_sa.v6.sin6_addr.s6_addr32[2] &&
257 !srv_sa.v6.sin6_addr.s6_addr32[1] &&
258 !srv_sa.v6.sin6_addr.s6_addr32[0];
259
260 memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
261 sizeof(cli_sa.v6.sin6_addr));
262 memcpy(&expected.skb_addrs[4], &in6addr_loopback,
263 sizeof(in6addr_loopback));
264 expected.skb_ports[0] = cli_sa.v6.sin6_port;
265 expected.skb_ports[1] = srv_sa.v6.sin6_port;
266 } else {
267 expected.eth_protocol = htons(ETH_P_IP);
268 expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
269
270 expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
271 expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
272 expected.skb_ports[0] = cli_sa.v4.sin_port;
273 expected.skb_ports[1] = srv_sa.v4.sin_port;
274 }
275
276 if (memcmp(&result, &expected, offsetof(struct data_check,
277 equal_check_end))) {
278 printf("unexpected data_check\n");
279 printf(" result: (0x%x, %u, %u)\n",
280 result.eth_protocol, result.ip_protocol,
281 result.bind_inany);
282 printf("expected: (0x%x, %u, %u)\n",
283 expected.eth_protocol, expected.ip_protocol,
284 expected.bind_inany);
285 RET_IF(1, "data_check result != expected",
286 "bpf_prog_linum:%ld\n", get_linum());
287 }
288
289 RET_IF(!result.hash, "data_check result.hash empty",
290 "result.hash:%u", result.hash);
291
292 expected.len += cmd ? sizeof(*cmd) : 0;
293 if (type == SOCK_STREAM)
294 RET_IF(expected.len > result.len, "expected.len > result.len",
295 "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
296 expected.len, result.len, get_linum());
297 else
298 RET_IF(expected.len != result.len, "expected.len != result.len",
299 "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
300 expected.len, result.len, get_linum());
301}
302
303static const char *result_to_str(enum result res)
304{
305 switch (res) {
306 case DROP_ERR_INNER_MAP:
307 return "DROP_ERR_INNER_MAP";
308 case DROP_ERR_SKB_DATA:
309 return "DROP_ERR_SKB_DATA";
310 case DROP_ERR_SK_SELECT_REUSEPORT:
311 return "DROP_ERR_SK_SELECT_REUSEPORT";
312 case DROP_MISC:
313 return "DROP_MISC";
314 case PASS:
315 return "PASS";
316 case PASS_ERR_SK_SELECT_REUSEPORT:
317 return "PASS_ERR_SK_SELECT_REUSEPORT";
318 default:
319 return "UNKNOWN";
320 }
321}
322
323static void check_results(void)
324{
325 __u32 results[NR_RESULTS];
326 __u32 i, broken = 0;
327 int err;
328
329 for (i = 0; i < NR_RESULTS; i++) {
330 err = bpf_map_lookup_elem(result_map, &i, &results[i]);
331 RET_IF(err < 0, "lookup_elem(result_map)",
332 "i:%u err:%d errno:%d\n", i, err, errno);
333 }
334
335 for (i = 0; i < NR_RESULTS; i++) {
336 if (results[i] != expected_results[i]) {
337 broken = i;
338 break;
339 }
340 }
341
342 if (i == NR_RESULTS)
343 return;
344
345 printf("unexpected result\n");
346 printf(" result: [");
347 printf("%u", results[0]);
348 for (i = 1; i < NR_RESULTS; i++)
349 printf(", %u", results[i]);
350 printf("]\n");
351
352 printf("expected: [");
353 printf("%u", expected_results[0]);
354 for (i = 1; i < NR_RESULTS; i++)
355 printf(", %u", expected_results[i]);
356 printf("]\n");
357
358 printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
359 get_linum());
360
361 CHECK_FAIL(true);
362}
363
364static int send_data(int type, sa_family_t family, void *data, size_t len,
365 enum result expected)
366{
367 union sa46 cli_sa;
368 int fd, err;
369
370 fd = socket(family, type, 0);
371 RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
372
373 sa46_init_loopback(&cli_sa, family);
374 err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
375 RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
376
377 err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
378 sizeof(srv_sa));
379 RET_ERR(err != len && expected >= PASS,
380 "sendto()", "family:%u err:%d errno:%d expected:%d\n",
381 family, err, errno, expected);
382
383 return fd;
384}
385
386static void do_test(int type, sa_family_t family, struct cmd *cmd,
387 enum result expected)
388{
389 int nev, srv_fd, cli_fd;
390 struct epoll_event ev;
391 struct cmd rcv_cmd;
392 ssize_t nread;
393
394 cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
395 expected);
396 if (cli_fd < 0)
397 return;
398 nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
399 RET_IF((nev <= 0 && expected >= PASS) ||
400 (nev > 0 && expected < PASS),
401 "nev <> expected",
402 "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
403 nev, expected, type, family,
404 cmd ? cmd->reuseport_index : -1,
405 cmd ? cmd->pass_on_failure : -1);
406 check_results();
407 check_data(type, family, cmd, cli_fd);
408
409 if (expected < PASS)
410 return;
411
412 RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
413 cmd->reuseport_index != ev.data.u32,
414 "check cmd->reuseport_index",
415 "cmd:(%u, %u) ev.data.u32:%u\n",
416 cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
417
418 srv_fd = sk_fds[ev.data.u32];
419 if (type == SOCK_STREAM) {
420 int new_fd = accept(srv_fd, NULL, 0);
421
422 RET_IF(new_fd == -1, "accept(srv_fd)",
423 "ev.data.u32:%u new_fd:%d errno:%d\n",
424 ev.data.u32, new_fd, errno);
425
426 nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
427 RET_IF(nread != sizeof(rcv_cmd),
428 "recv(new_fd)",
429 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
430 ev.data.u32, nread, sizeof(rcv_cmd), errno);
431
432 close(new_fd);
433 } else {
434 nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
435 RET_IF(nread != sizeof(rcv_cmd),
436 "recv(sk_fds)",
437 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
438 ev.data.u32, nread, sizeof(rcv_cmd), errno);
439 }
440
441 close(cli_fd);
442}
443
444static void test_err_inner_map(int type, sa_family_t family)
445{
446 struct cmd cmd = {
447 .reuseport_index = 0,
448 .pass_on_failure = 0,
449 };
450
451 expected_results[DROP_ERR_INNER_MAP]++;
452 do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
453}
454
455static void test_err_skb_data(int type, sa_family_t family)
456{
457 expected_results[DROP_ERR_SKB_DATA]++;
458 do_test(type, family, NULL, DROP_ERR_SKB_DATA);
459}
460
461static void test_err_sk_select_port(int type, sa_family_t family)
462{
463 struct cmd cmd = {
464 .reuseport_index = REUSEPORT_ARRAY_SIZE,
465 .pass_on_failure = 0,
466 };
467
468 expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
469 do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
470}
471
472static void test_pass(int type, sa_family_t family)
473{
474 struct cmd cmd;
475 int i;
476
477 cmd.pass_on_failure = 0;
478 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
479 expected_results[PASS]++;
480 cmd.reuseport_index = i;
481 do_test(type, family, &cmd, PASS);
482 }
483}
484
485static void test_syncookie(int type, sa_family_t family)
486{
487 int err, tmp_index = 1;
488 struct cmd cmd = {
489 .reuseport_index = 0,
490 .pass_on_failure = 0,
491 };
492
493 /*
494 * +1 for TCP-SYN and
495 * +1 for the TCP-ACK (ack the syncookie)
496 */
497 expected_results[PASS] += 2;
498 enable_syncookie();
499 /*
500 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
501 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
502 * tmp_index_ovr_map
503 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
504 * is from the cmd.reuseport_index
505 */
506 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
507 &tmp_index, BPF_ANY);
508 RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
509 "err:%d errno:%d\n", err, errno);
510 do_test(type, family, &cmd, PASS);
511 err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
512 &tmp_index);
513 RET_IF(err < 0 || tmp_index >= 0,
514 "lookup_elem(tmp_index_ovr_map)",
515 "err:%d errno:%d tmp_index:%d\n",
516 err, errno, tmp_index);
517 disable_syncookie();
518}
519
520static void test_pass_on_err(int type, sa_family_t family)
521{
522 struct cmd cmd = {
523 .reuseport_index = REUSEPORT_ARRAY_SIZE,
524 .pass_on_failure = 1,
525 };
526
527 expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
528 do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
529}
530
531static void test_detach_bpf(int type, sa_family_t family)
532{
533#ifdef SO_DETACH_REUSEPORT_BPF
534 __u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
535 struct epoll_event ev;
536 int cli_fd, err, nev;
537 struct cmd cmd = {};
538 int optvalue = 0;
539
540 err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
541 &optvalue, sizeof(optvalue));
542 RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
543 "err:%d errno:%d\n", err, errno);
544
545 err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
546 &optvalue, sizeof(optvalue));
547 RET_IF(err == 0 || errno != ENOENT,
548 "setsockopt(SO_DETACH_REUSEPORT_BPF)",
549 "err:%d errno:%d\n", err, errno);
550
551 for (i = 0; i < NR_RESULTS; i++) {
552 err = bpf_map_lookup_elem(result_map, &i, &tmp);
553 RET_IF(err < 0, "lookup_elem(result_map)",
554 "i:%u err:%d errno:%d\n", i, err, errno);
555 nr_run_before += tmp;
556 }
557
558 cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
559 if (cli_fd < 0)
560 return;
561 nev = epoll_wait(epfd, &ev, 1, 5);
562 RET_IF(nev <= 0, "nev <= 0",
563 "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
564 nev, type, family);
565
566 for (i = 0; i < NR_RESULTS; i++) {
567 err = bpf_map_lookup_elem(result_map, &i, &tmp);
568 RET_IF(err < 0, "lookup_elem(result_map)",
569 "i:%u err:%d errno:%d\n", i, err, errno);
570 nr_run_after += tmp;
571 }
572
573 RET_IF(nr_run_before != nr_run_after,
574 "nr_run_before != nr_run_after",
575 "nr_run_before:%u nr_run_after:%u\n",
576 nr_run_before, nr_run_after);
577
578 close(cli_fd);
579#else
580 test__skip();
581#endif
582}
583
584static void prepare_sk_fds(int type, sa_family_t family, bool inany)
585{
586 const int first = REUSEPORT_ARRAY_SIZE - 1;
587 int i, err, optval = 1;
588 struct epoll_event ev;
589 socklen_t addrlen;
590
591 if (inany)
592 sa46_init_inany(&srv_sa, family);
593 else
594 sa46_init_loopback(&srv_sa, family);
595 addrlen = sizeof(srv_sa);
596
597 /*
598 * The sk_fds[] is filled from the back such that the order
599 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
600 */
601 for (i = first; i >= 0; i--) {
602 sk_fds[i] = socket(family, type, 0);
603 RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
604 i, sk_fds[i], errno);
605 err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
606 &optval, sizeof(optval));
607 RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
608 "sk_fds[%d] err:%d errno:%d\n",
609 i, err, errno);
610
611 if (i == first) {
612 err = setsockopt(sk_fds[i], SOL_SOCKET,
613 SO_ATTACH_REUSEPORT_EBPF,
614 &select_by_skb_data_prog,
615 sizeof(select_by_skb_data_prog));
616 RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
617 "err:%d errno:%d\n", err, errno);
618 }
619
620 err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
621 RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
622 i, err, errno);
623
624 if (type == SOCK_STREAM) {
625 err = listen(sk_fds[i], 10);
626 RET_IF(err < 0, "listen()",
627 "sk_fds[%d] err:%d errno:%d\n",
628 i, err, errno);
629 }
630
631 err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
632 BPF_NOEXIST);
633 RET_IF(err < 0, "update_elem(reuseport_array)",
634 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
635
636 if (i == first) {
637 socklen_t addrlen = sizeof(srv_sa);
638
639 err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
640 &addrlen);
641 RET_IF(err == -1, "getsockname()",
642 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
643 }
644 }
645
646 epfd = epoll_create(1);
647 RET_IF(epfd == -1, "epoll_create(1)",
648 "epfd:%d errno:%d\n", epfd, errno);
649
650 ev.events = EPOLLIN;
651 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
652 ev.data.u32 = i;
653 err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
654 RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
655 }
656}
657
658static void setup_per_test(int type, sa_family_t family, bool inany,
659 bool no_inner_map)
660{
661 int ovr = -1, err;
662
663 prepare_sk_fds(type, family, inany);
664 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
665 BPF_ANY);
666 RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
667 "err:%d errno:%d\n", err, errno);
668
669 /* Install reuseport_array to outer_map? */
670 if (no_inner_map)
671 return;
672
673 err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
674 BPF_ANY);
675 RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
676 "err:%d errno:%d\n", err, errno);
677}
678
679static void cleanup_per_test(bool no_inner_map)
680{
681 int i, err, zero = 0;
682
683 memset(expected_results, 0, sizeof(expected_results));
684
685 for (i = 0; i < NR_RESULTS; i++) {
686 err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
687 RET_IF(err, "reset elem in result_map",
688 "i:%u err:%d errno:%d\n", i, err, errno);
689 }
690
691 err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
692 RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
693 err, errno);
694
695 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
696 close(sk_fds[i]);
697 close(epfd);
698
699 /* Delete reuseport_array from outer_map? */
700 if (no_inner_map)
701 return;
702
703 err = bpf_map_delete_elem(outer_map, &index_zero);
704 RET_IF(err < 0, "delete_elem(outer_map)",
705 "err:%d errno:%d\n", err, errno);
706}
707
708static void cleanup(void)
709{
710 if (outer_map >= 0) {
711 close(outer_map);
712 outer_map = -1;
713 }
714
715 if (reuseport_array >= 0) {
716 close(reuseport_array);
717 reuseport_array = -1;
718 }
719
720 if (obj) {
721 bpf_object__close(obj);
722 obj = NULL;
723 }
724
725 memset(expected_results, 0, sizeof(expected_results));
726}
727
728static const char *maptype_str(enum bpf_map_type type)
729{
730 switch (type) {
731 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
732 return "reuseport_sockarray";
733 case BPF_MAP_TYPE_SOCKMAP:
734 return "sockmap";
735 case BPF_MAP_TYPE_SOCKHASH:
736 return "sockhash";
737 default:
738 return "unknown";
739 }
740}
741
742static const char *family_str(sa_family_t family)
743{
744 switch (family) {
745 case AF_INET:
746 return "IPv4";
747 case AF_INET6:
748 return "IPv6";
749 default:
750 return "unknown";
751 }
752}
753
754static const char *sotype_str(int sotype)
755{
756 switch (sotype) {
757 case SOCK_STREAM:
758 return "TCP";
759 case SOCK_DGRAM:
760 return "UDP";
761 default:
762 return "unknown";
763 }
764}
765
766#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
767
768static void test_config(int sotype, sa_family_t family, bool inany)
769{
770 const struct test {
771 void (*fn)(int sotype, sa_family_t family);
772 const char *name;
773 bool no_inner_map;
774 int need_sotype;
775 } tests[] = {
776 TEST_INIT(test_err_inner_map,
777 .no_inner_map = true),
778 TEST_INIT(test_err_skb_data),
779 TEST_INIT(test_err_sk_select_port),
780 TEST_INIT(test_pass),
781 TEST_INIT(test_syncookie,
782 .need_sotype = SOCK_STREAM),
783 TEST_INIT(test_pass_on_err),
784 TEST_INIT(test_detach_bpf),
785 };
786 struct netns_obj *netns;
787 char s[MAX_TEST_NAME];
788 const struct test *t;
789
790 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
791 if (t->need_sotype && t->need_sotype != sotype)
792 continue; /* test not compatible with socket type */
793
794 snprintf(s, sizeof(s), "%s %s/%s %s %s",
795 maptype_str(inner_map_type),
796 family_str(family), sotype_str(sotype),
797 inany ? "INANY" : "LOOPBACK", t->name);
798
799 if (!test__start_subtest(s))
800 continue;
801
802 netns = netns_new("select_reuseport", true);
803 if (!ASSERT_OK_PTR(netns, "netns_new"))
804 continue;
805
806 if (CHECK_FAIL(enable_fastopen()))
807 goto out;
808 if (CHECK_FAIL(disable_syncookie()))
809 goto out;
810
811 setup_per_test(sotype, family, inany, t->no_inner_map);
812 t->fn(sotype, family);
813 cleanup_per_test(t->no_inner_map);
814
815out:
816 netns_free(netns);
817 }
818}
819
820#define BIND_INANY true
821
822static void test_all(void)
823{
824 const struct config {
825 int sotype;
826 sa_family_t family;
827 bool inany;
828 } configs[] = {
829 { SOCK_STREAM, AF_INET },
830 { SOCK_STREAM, AF_INET, BIND_INANY },
831 { SOCK_STREAM, AF_INET6 },
832 { SOCK_STREAM, AF_INET6, BIND_INANY },
833 { SOCK_DGRAM, AF_INET },
834 { SOCK_DGRAM, AF_INET6 },
835 };
836 const struct config *c;
837
838 for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
839 test_config(c->sotype, c->family, c->inany);
840}
841
842void test_map_type(enum bpf_map_type mt)
843{
844 if (create_maps(mt))
845 goto out;
846 if (prepare_bpf_obj())
847 goto out;
848
849 test_all();
850out:
851 cleanup();
852}
853
854void serial_test_select_reuseport(void)
855{
856 test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
857 test_map_type(BPF_MAP_TYPE_SOCKMAP);
858 test_map_type(BPF_MAP_TYPE_SOCKHASH);
859}
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2018 Facebook */
3
4#include <stdlib.h>
5#include <unistd.h>
6#include <stdbool.h>
7#include <string.h>
8#include <errno.h>
9#include <assert.h>
10#include <fcntl.h>
11#include <linux/bpf.h>
12#include <linux/err.h>
13#include <linux/types.h>
14#include <linux/if_ether.h>
15#include <sys/types.h>
16#include <sys/epoll.h>
17#include <sys/socket.h>
18#include <netinet/in.h>
19#include <bpf/bpf.h>
20#include <bpf/libbpf.h>
21#include "bpf_rlimit.h"
22#include "bpf_util.h"
23
24#include "test_progs.h"
25#include "test_select_reuseport_common.h"
26
27#define MAX_TEST_NAME 80
28#define MIN_TCPHDR_LEN 20
29#define UDPHDR_LEN 8
30
31#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
32#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
33#define REUSEPORT_ARRAY_SIZE 32
34
35static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
36static __u32 expected_results[NR_RESULTS];
37static int sk_fds[REUSEPORT_ARRAY_SIZE];
38static int reuseport_array = -1, outer_map = -1;
39static enum bpf_map_type inner_map_type;
40static int select_by_skb_data_prog;
41static int saved_tcp_syncookie = -1;
42static struct bpf_object *obj;
43static int saved_tcp_fo = -1;
44static __u32 index_zero;
45static int epfd;
46
47static union sa46 {
48 struct sockaddr_in6 v6;
49 struct sockaddr_in v4;
50 sa_family_t family;
51} srv_sa;
52
53#define RET_IF(condition, tag, format...) ({ \
54 if (CHECK_FAIL(condition)) { \
55 printf(tag " " format); \
56 return; \
57 } \
58})
59
60#define RET_ERR(condition, tag, format...) ({ \
61 if (CHECK_FAIL(condition)) { \
62 printf(tag " " format); \
63 return -1; \
64 } \
65})
66
67static int create_maps(enum bpf_map_type inner_type)
68{
69 struct bpf_create_map_attr attr = {};
70
71 inner_map_type = inner_type;
72
73 /* Creating reuseport_array */
74 attr.name = "reuseport_array";
75 attr.map_type = inner_type;
76 attr.key_size = sizeof(__u32);
77 attr.value_size = sizeof(__u32);
78 attr.max_entries = REUSEPORT_ARRAY_SIZE;
79
80 reuseport_array = bpf_create_map_xattr(&attr);
81 RET_ERR(reuseport_array < 0, "creating reuseport_array",
82 "reuseport_array:%d errno:%d\n", reuseport_array, errno);
83
84 /* Creating outer_map */
85 attr.name = "outer_map";
86 attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
87 attr.key_size = sizeof(__u32);
88 attr.value_size = sizeof(__u32);
89 attr.max_entries = 1;
90 attr.inner_map_fd = reuseport_array;
91 outer_map = bpf_create_map_xattr(&attr);
92 RET_ERR(outer_map < 0, "creating outer_map",
93 "outer_map:%d errno:%d\n", outer_map, errno);
94
95 return 0;
96}
97
98static int prepare_bpf_obj(void)
99{
100 struct bpf_program *prog;
101 struct bpf_map *map;
102 int err;
103
104 obj = bpf_object__open("test_select_reuseport_kern.o");
105 err = libbpf_get_error(obj);
106 RET_ERR(err, "open test_select_reuseport_kern.o",
107 "obj:%p PTR_ERR(obj):%d\n", obj, err);
108
109 map = bpf_object__find_map_by_name(obj, "outer_map");
110 RET_ERR(!map, "find outer_map", "!map\n");
111 err = bpf_map__reuse_fd(map, outer_map);
112 RET_ERR(err, "reuse outer_map", "err:%d\n", err);
113
114 err = bpf_object__load(obj);
115 RET_ERR(err, "load bpf_object", "err:%d\n", err);
116
117 prog = bpf_program__next(NULL, obj);
118 RET_ERR(!prog, "get first bpf_program", "!prog\n");
119 select_by_skb_data_prog = bpf_program__fd(prog);
120 RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
121 "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
122
123 map = bpf_object__find_map_by_name(obj, "result_map");
124 RET_ERR(!map, "find result_map", "!map\n");
125 result_map = bpf_map__fd(map);
126 RET_ERR(result_map < 0, "get result_map fd",
127 "result_map:%d\n", result_map);
128
129 map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
130 RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
131 tmp_index_ovr_map = bpf_map__fd(map);
132 RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
133 "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
134
135 map = bpf_object__find_map_by_name(obj, "linum_map");
136 RET_ERR(!map, "find linum_map", "!map\n");
137 linum_map = bpf_map__fd(map);
138 RET_ERR(linum_map < 0, "get linum_map fd",
139 "linum_map:%d\n", linum_map);
140
141 map = bpf_object__find_map_by_name(obj, "data_check_map");
142 RET_ERR(!map, "find data_check_map", "!map\n");
143 data_check_map = bpf_map__fd(map);
144 RET_ERR(data_check_map < 0, "get data_check_map fd",
145 "data_check_map:%d\n", data_check_map);
146
147 return 0;
148}
149
150static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
151{
152 memset(sa, 0, sizeof(*sa));
153 sa->family = family;
154 if (sa->family == AF_INET6)
155 sa->v6.sin6_addr = in6addr_loopback;
156 else
157 sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
158}
159
160static void sa46_init_inany(union sa46 *sa, sa_family_t family)
161{
162 memset(sa, 0, sizeof(*sa));
163 sa->family = family;
164 if (sa->family == AF_INET6)
165 sa->v6.sin6_addr = in6addr_any;
166 else
167 sa->v4.sin_addr.s_addr = INADDR_ANY;
168}
169
170static int read_int_sysctl(const char *sysctl)
171{
172 char buf[16];
173 int fd, ret;
174
175 fd = open(sysctl, 0);
176 RET_ERR(fd == -1, "open(sysctl)",
177 "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
178
179 ret = read(fd, buf, sizeof(buf));
180 RET_ERR(ret <= 0, "read(sysctl)",
181 "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
182
183 close(fd);
184 return atoi(buf);
185}
186
187static int write_int_sysctl(const char *sysctl, int v)
188{
189 int fd, ret, size;
190 char buf[16];
191
192 fd = open(sysctl, O_RDWR);
193 RET_ERR(fd == -1, "open(sysctl)",
194 "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
195
196 size = snprintf(buf, sizeof(buf), "%d", v);
197 ret = write(fd, buf, size);
198 RET_ERR(ret != size, "write(sysctl)",
199 "sysctl:%s ret:%d size:%d errno:%d\n",
200 sysctl, ret, size, errno);
201
202 close(fd);
203 return 0;
204}
205
206static void restore_sysctls(void)
207{
208 if (saved_tcp_fo != -1)
209 write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
210 if (saved_tcp_syncookie != -1)
211 write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
212}
213
214static int enable_fastopen(void)
215{
216 int fo;
217
218 fo = read_int_sysctl(TCP_FO_SYSCTL);
219 if (fo < 0)
220 return -1;
221
222 return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
223}
224
225static int enable_syncookie(void)
226{
227 return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
228}
229
230static int disable_syncookie(void)
231{
232 return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
233}
234
235static long get_linum(void)
236{
237 __u32 linum;
238 int err;
239
240 err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
241 RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
242 err, errno);
243
244 return linum;
245}
246
247static void check_data(int type, sa_family_t family, const struct cmd *cmd,
248 int cli_fd)
249{
250 struct data_check expected = {}, result;
251 union sa46 cli_sa;
252 socklen_t addrlen;
253 int err;
254
255 addrlen = sizeof(cli_sa);
256 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
257 &addrlen);
258 RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
259 err, errno);
260
261 err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
262 RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
263 err, errno);
264
265 if (type == SOCK_STREAM) {
266 expected.len = MIN_TCPHDR_LEN;
267 expected.ip_protocol = IPPROTO_TCP;
268 } else {
269 expected.len = UDPHDR_LEN;
270 expected.ip_protocol = IPPROTO_UDP;
271 }
272
273 if (family == AF_INET6) {
274 expected.eth_protocol = htons(ETH_P_IPV6);
275 expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
276 !srv_sa.v6.sin6_addr.s6_addr32[2] &&
277 !srv_sa.v6.sin6_addr.s6_addr32[1] &&
278 !srv_sa.v6.sin6_addr.s6_addr32[0];
279
280 memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
281 sizeof(cli_sa.v6.sin6_addr));
282 memcpy(&expected.skb_addrs[4], &in6addr_loopback,
283 sizeof(in6addr_loopback));
284 expected.skb_ports[0] = cli_sa.v6.sin6_port;
285 expected.skb_ports[1] = srv_sa.v6.sin6_port;
286 } else {
287 expected.eth_protocol = htons(ETH_P_IP);
288 expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
289
290 expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
291 expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
292 expected.skb_ports[0] = cli_sa.v4.sin_port;
293 expected.skb_ports[1] = srv_sa.v4.sin_port;
294 }
295
296 if (memcmp(&result, &expected, offsetof(struct data_check,
297 equal_check_end))) {
298 printf("unexpected data_check\n");
299 printf(" result: (0x%x, %u, %u)\n",
300 result.eth_protocol, result.ip_protocol,
301 result.bind_inany);
302 printf("expected: (0x%x, %u, %u)\n",
303 expected.eth_protocol, expected.ip_protocol,
304 expected.bind_inany);
305 RET_IF(1, "data_check result != expected",
306 "bpf_prog_linum:%ld\n", get_linum());
307 }
308
309 RET_IF(!result.hash, "data_check result.hash empty",
310 "result.hash:%u", result.hash);
311
312 expected.len += cmd ? sizeof(*cmd) : 0;
313 if (type == SOCK_STREAM)
314 RET_IF(expected.len > result.len, "expected.len > result.len",
315 "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
316 expected.len, result.len, get_linum());
317 else
318 RET_IF(expected.len != result.len, "expected.len != result.len",
319 "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
320 expected.len, result.len, get_linum());
321}
322
323static const char *result_to_str(enum result res)
324{
325 switch (res) {
326 case DROP_ERR_INNER_MAP:
327 return "DROP_ERR_INNER_MAP";
328 case DROP_ERR_SKB_DATA:
329 return "DROP_ERR_SKB_DATA";
330 case DROP_ERR_SK_SELECT_REUSEPORT:
331 return "DROP_ERR_SK_SELECT_REUSEPORT";
332 case DROP_MISC:
333 return "DROP_MISC";
334 case PASS:
335 return "PASS";
336 case PASS_ERR_SK_SELECT_REUSEPORT:
337 return "PASS_ERR_SK_SELECT_REUSEPORT";
338 default:
339 return "UNKNOWN";
340 }
341}
342
343static void check_results(void)
344{
345 __u32 results[NR_RESULTS];
346 __u32 i, broken = 0;
347 int err;
348
349 for (i = 0; i < NR_RESULTS; i++) {
350 err = bpf_map_lookup_elem(result_map, &i, &results[i]);
351 RET_IF(err < 0, "lookup_elem(result_map)",
352 "i:%u err:%d errno:%d\n", i, err, errno);
353 }
354
355 for (i = 0; i < NR_RESULTS; i++) {
356 if (results[i] != expected_results[i]) {
357 broken = i;
358 break;
359 }
360 }
361
362 if (i == NR_RESULTS)
363 return;
364
365 printf("unexpected result\n");
366 printf(" result: [");
367 printf("%u", results[0]);
368 for (i = 1; i < NR_RESULTS; i++)
369 printf(", %u", results[i]);
370 printf("]\n");
371
372 printf("expected: [");
373 printf("%u", expected_results[0]);
374 for (i = 1; i < NR_RESULTS; i++)
375 printf(", %u", expected_results[i]);
376 printf("]\n");
377
378 printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
379 get_linum());
380
381 CHECK_FAIL(true);
382}
383
384static int send_data(int type, sa_family_t family, void *data, size_t len,
385 enum result expected)
386{
387 union sa46 cli_sa;
388 int fd, err;
389
390 fd = socket(family, type, 0);
391 RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
392
393 sa46_init_loopback(&cli_sa, family);
394 err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
395 RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
396
397 err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
398 sizeof(srv_sa));
399 RET_ERR(err != len && expected >= PASS,
400 "sendto()", "family:%u err:%d errno:%d expected:%d\n",
401 family, err, errno, expected);
402
403 return fd;
404}
405
406static void do_test(int type, sa_family_t family, struct cmd *cmd,
407 enum result expected)
408{
409 int nev, srv_fd, cli_fd;
410 struct epoll_event ev;
411 struct cmd rcv_cmd;
412 ssize_t nread;
413
414 cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
415 expected);
416 if (cli_fd < 0)
417 return;
418 nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
419 RET_IF((nev <= 0 && expected >= PASS) ||
420 (nev > 0 && expected < PASS),
421 "nev <> expected",
422 "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
423 nev, expected, type, family,
424 cmd ? cmd->reuseport_index : -1,
425 cmd ? cmd->pass_on_failure : -1);
426 check_results();
427 check_data(type, family, cmd, cli_fd);
428
429 if (expected < PASS)
430 return;
431
432 RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
433 cmd->reuseport_index != ev.data.u32,
434 "check cmd->reuseport_index",
435 "cmd:(%u, %u) ev.data.u32:%u\n",
436 cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
437
438 srv_fd = sk_fds[ev.data.u32];
439 if (type == SOCK_STREAM) {
440 int new_fd = accept(srv_fd, NULL, 0);
441
442 RET_IF(new_fd == -1, "accept(srv_fd)",
443 "ev.data.u32:%u new_fd:%d errno:%d\n",
444 ev.data.u32, new_fd, errno);
445
446 nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
447 RET_IF(nread != sizeof(rcv_cmd),
448 "recv(new_fd)",
449 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
450 ev.data.u32, nread, sizeof(rcv_cmd), errno);
451
452 close(new_fd);
453 } else {
454 nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
455 RET_IF(nread != sizeof(rcv_cmd),
456 "recv(sk_fds)",
457 "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
458 ev.data.u32, nread, sizeof(rcv_cmd), errno);
459 }
460
461 close(cli_fd);
462}
463
464static void test_err_inner_map(int type, sa_family_t family)
465{
466 struct cmd cmd = {
467 .reuseport_index = 0,
468 .pass_on_failure = 0,
469 };
470
471 expected_results[DROP_ERR_INNER_MAP]++;
472 do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
473}
474
475static void test_err_skb_data(int type, sa_family_t family)
476{
477 expected_results[DROP_ERR_SKB_DATA]++;
478 do_test(type, family, NULL, DROP_ERR_SKB_DATA);
479}
480
481static void test_err_sk_select_port(int type, sa_family_t family)
482{
483 struct cmd cmd = {
484 .reuseport_index = REUSEPORT_ARRAY_SIZE,
485 .pass_on_failure = 0,
486 };
487
488 expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
489 do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
490}
491
492static void test_pass(int type, sa_family_t family)
493{
494 struct cmd cmd;
495 int i;
496
497 cmd.pass_on_failure = 0;
498 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
499 expected_results[PASS]++;
500 cmd.reuseport_index = i;
501 do_test(type, family, &cmd, PASS);
502 }
503}
504
505static void test_syncookie(int type, sa_family_t family)
506{
507 int err, tmp_index = 1;
508 struct cmd cmd = {
509 .reuseport_index = 0,
510 .pass_on_failure = 0,
511 };
512
513 /*
514 * +1 for TCP-SYN and
515 * +1 for the TCP-ACK (ack the syncookie)
516 */
517 expected_results[PASS] += 2;
518 enable_syncookie();
519 /*
520 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
521 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
522 * tmp_index_ovr_map
523 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
524 * is from the cmd.reuseport_index
525 */
526 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
527 &tmp_index, BPF_ANY);
528 RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
529 "err:%d errno:%d\n", err, errno);
530 do_test(type, family, &cmd, PASS);
531 err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
532 &tmp_index);
533 RET_IF(err < 0 || tmp_index >= 0,
534 "lookup_elem(tmp_index_ovr_map)",
535 "err:%d errno:%d tmp_index:%d\n",
536 err, errno, tmp_index);
537 disable_syncookie();
538}
539
540static void test_pass_on_err(int type, sa_family_t family)
541{
542 struct cmd cmd = {
543 .reuseport_index = REUSEPORT_ARRAY_SIZE,
544 .pass_on_failure = 1,
545 };
546
547 expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
548 do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
549}
550
551static void test_detach_bpf(int type, sa_family_t family)
552{
553#ifdef SO_DETACH_REUSEPORT_BPF
554 __u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
555 struct epoll_event ev;
556 int cli_fd, err, nev;
557 struct cmd cmd = {};
558 int optvalue = 0;
559
560 err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
561 &optvalue, sizeof(optvalue));
562 RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
563 "err:%d errno:%d\n", err, errno);
564
565 err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
566 &optvalue, sizeof(optvalue));
567 RET_IF(err == 0 || errno != ENOENT,
568 "setsockopt(SO_DETACH_REUSEPORT_BPF)",
569 "err:%d errno:%d\n", err, errno);
570
571 for (i = 0; i < NR_RESULTS; i++) {
572 err = bpf_map_lookup_elem(result_map, &i, &tmp);
573 RET_IF(err < 0, "lookup_elem(result_map)",
574 "i:%u err:%d errno:%d\n", i, err, errno);
575 nr_run_before += tmp;
576 }
577
578 cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
579 if (cli_fd < 0)
580 return;
581 nev = epoll_wait(epfd, &ev, 1, 5);
582 RET_IF(nev <= 0, "nev <= 0",
583 "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
584 nev, type, family);
585
586 for (i = 0; i < NR_RESULTS; i++) {
587 err = bpf_map_lookup_elem(result_map, &i, &tmp);
588 RET_IF(err < 0, "lookup_elem(result_map)",
589 "i:%u err:%d errno:%d\n", i, err, errno);
590 nr_run_after += tmp;
591 }
592
593 RET_IF(nr_run_before != nr_run_after,
594 "nr_run_before != nr_run_after",
595 "nr_run_before:%u nr_run_after:%u\n",
596 nr_run_before, nr_run_after);
597
598 close(cli_fd);
599#else
600 test__skip();
601#endif
602}
603
604static void prepare_sk_fds(int type, sa_family_t family, bool inany)
605{
606 const int first = REUSEPORT_ARRAY_SIZE - 1;
607 int i, err, optval = 1;
608 struct epoll_event ev;
609 socklen_t addrlen;
610
611 if (inany)
612 sa46_init_inany(&srv_sa, family);
613 else
614 sa46_init_loopback(&srv_sa, family);
615 addrlen = sizeof(srv_sa);
616
617 /*
618 * The sk_fds[] is filled from the back such that the order
619 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
620 */
621 for (i = first; i >= 0; i--) {
622 sk_fds[i] = socket(family, type, 0);
623 RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
624 i, sk_fds[i], errno);
625 err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
626 &optval, sizeof(optval));
627 RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
628 "sk_fds[%d] err:%d errno:%d\n",
629 i, err, errno);
630
631 if (i == first) {
632 err = setsockopt(sk_fds[i], SOL_SOCKET,
633 SO_ATTACH_REUSEPORT_EBPF,
634 &select_by_skb_data_prog,
635 sizeof(select_by_skb_data_prog));
636 RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
637 "err:%d errno:%d\n", err, errno);
638 }
639
640 err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
641 RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
642 i, err, errno);
643
644 if (type == SOCK_STREAM) {
645 err = listen(sk_fds[i], 10);
646 RET_IF(err < 0, "listen()",
647 "sk_fds[%d] err:%d errno:%d\n",
648 i, err, errno);
649 }
650
651 err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
652 BPF_NOEXIST);
653 RET_IF(err < 0, "update_elem(reuseport_array)",
654 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
655
656 if (i == first) {
657 socklen_t addrlen = sizeof(srv_sa);
658
659 err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
660 &addrlen);
661 RET_IF(err == -1, "getsockname()",
662 "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
663 }
664 }
665
666 epfd = epoll_create(1);
667 RET_IF(epfd == -1, "epoll_create(1)",
668 "epfd:%d errno:%d\n", epfd, errno);
669
670 ev.events = EPOLLIN;
671 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
672 ev.data.u32 = i;
673 err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
674 RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
675 }
676}
677
678static void setup_per_test(int type, sa_family_t family, bool inany,
679 bool no_inner_map)
680{
681 int ovr = -1, err;
682
683 prepare_sk_fds(type, family, inany);
684 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
685 BPF_ANY);
686 RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
687 "err:%d errno:%d\n", err, errno);
688
689 /* Install reuseport_array to outer_map? */
690 if (no_inner_map)
691 return;
692
693 err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
694 BPF_ANY);
695 RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
696 "err:%d errno:%d\n", err, errno);
697}
698
699static void cleanup_per_test(bool no_inner_map)
700{
701 int i, err, zero = 0;
702
703 memset(expected_results, 0, sizeof(expected_results));
704
705 for (i = 0; i < NR_RESULTS; i++) {
706 err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
707 RET_IF(err, "reset elem in result_map",
708 "i:%u err:%d errno:%d\n", i, err, errno);
709 }
710
711 err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
712 RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
713 err, errno);
714
715 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
716 close(sk_fds[i]);
717 close(epfd);
718
719 /* Delete reuseport_array from outer_map? */
720 if (no_inner_map)
721 return;
722
723 err = bpf_map_delete_elem(outer_map, &index_zero);
724 RET_IF(err < 0, "delete_elem(outer_map)",
725 "err:%d errno:%d\n", err, errno);
726}
727
728static void cleanup(void)
729{
730 if (outer_map >= 0) {
731 close(outer_map);
732 outer_map = -1;
733 }
734
735 if (reuseport_array >= 0) {
736 close(reuseport_array);
737 reuseport_array = -1;
738 }
739
740 if (obj) {
741 bpf_object__close(obj);
742 obj = NULL;
743 }
744
745 memset(expected_results, 0, sizeof(expected_results));
746}
747
748static const char *maptype_str(enum bpf_map_type type)
749{
750 switch (type) {
751 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
752 return "reuseport_sockarray";
753 case BPF_MAP_TYPE_SOCKMAP:
754 return "sockmap";
755 case BPF_MAP_TYPE_SOCKHASH:
756 return "sockhash";
757 default:
758 return "unknown";
759 }
760}
761
762static const char *family_str(sa_family_t family)
763{
764 switch (family) {
765 case AF_INET:
766 return "IPv4";
767 case AF_INET6:
768 return "IPv6";
769 default:
770 return "unknown";
771 }
772}
773
774static const char *sotype_str(int sotype)
775{
776 switch (sotype) {
777 case SOCK_STREAM:
778 return "TCP";
779 case SOCK_DGRAM:
780 return "UDP";
781 default:
782 return "unknown";
783 }
784}
785
786#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
787
788static void test_config(int sotype, sa_family_t family, bool inany)
789{
790 const struct test {
791 void (*fn)(int sotype, sa_family_t family);
792 const char *name;
793 bool no_inner_map;
794 int need_sotype;
795 } tests[] = {
796 TEST_INIT(test_err_inner_map,
797 .no_inner_map = true),
798 TEST_INIT(test_err_skb_data),
799 TEST_INIT(test_err_sk_select_port),
800 TEST_INIT(test_pass),
801 TEST_INIT(test_syncookie,
802 .need_sotype = SOCK_STREAM),
803 TEST_INIT(test_pass_on_err),
804 TEST_INIT(test_detach_bpf),
805 };
806 char s[MAX_TEST_NAME];
807 const struct test *t;
808
809 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
810 if (t->need_sotype && t->need_sotype != sotype)
811 continue; /* test not compatible with socket type */
812
813 snprintf(s, sizeof(s), "%s %s/%s %s %s",
814 maptype_str(inner_map_type),
815 family_str(family), sotype_str(sotype),
816 inany ? "INANY" : "LOOPBACK", t->name);
817
818 if (!test__start_subtest(s))
819 continue;
820
821 setup_per_test(sotype, family, inany, t->no_inner_map);
822 t->fn(sotype, family);
823 cleanup_per_test(t->no_inner_map);
824 }
825}
826
827#define BIND_INANY true
828
829static void test_all(void)
830{
831 const struct config {
832 int sotype;
833 sa_family_t family;
834 bool inany;
835 } configs[] = {
836 { SOCK_STREAM, AF_INET },
837 { SOCK_STREAM, AF_INET, BIND_INANY },
838 { SOCK_STREAM, AF_INET6 },
839 { SOCK_STREAM, AF_INET6, BIND_INANY },
840 { SOCK_DGRAM, AF_INET },
841 { SOCK_DGRAM, AF_INET6 },
842 };
843 const struct config *c;
844
845 for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
846 test_config(c->sotype, c->family, c->inany);
847}
848
849void test_map_type(enum bpf_map_type mt)
850{
851 if (create_maps(mt))
852 goto out;
853 if (prepare_bpf_obj())
854 goto out;
855
856 test_all();
857out:
858 cleanup();
859}
860
861void test_select_reuseport(void)
862{
863 saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
864 if (saved_tcp_fo < 0)
865 goto out;
866 saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
867 if (saved_tcp_syncookie < 0)
868 goto out;
869
870 if (enable_fastopen())
871 goto out;
872 if (disable_syncookie())
873 goto out;
874
875 test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
876 test_map_type(BPF_MAP_TYPE_SOCKMAP);
877 test_map_type(BPF_MAP_TYPE_SOCKHASH);
878out:
879 restore_sysctls();
880}