Linux Audio

Check our new training course

Real-Time Linux with PREEMPT_RT training

Feb 18-20, 2025
Register
Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2018 Facebook */
  3
  4#include <stdlib.h>
  5#include <unistd.h>
  6#include <stdbool.h>
  7#include <string.h>
  8#include <errno.h>
  9#include <assert.h>
 10#include <fcntl.h>
 11#include <linux/bpf.h>
 12#include <linux/err.h>
 13#include <linux/types.h>
 14#include <linux/if_ether.h>
 15#include <sys/types.h>
 16#include <sys/epoll.h>
 17#include <sys/socket.h>
 18#include <netinet/in.h>
 19#include <bpf/bpf.h>
 20#include <bpf/libbpf.h>
 
 21#include "bpf_util.h"
 22
 23#include "test_progs.h"
 24#include "test_select_reuseport_common.h"
 25
 26#define MAX_TEST_NAME 80
 27#define MIN_TCPHDR_LEN 20
 28#define UDPHDR_LEN 8
 29
 30#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
 31#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
 32#define REUSEPORT_ARRAY_SIZE 32
 33
 34static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
 35static __u32 expected_results[NR_RESULTS];
 36static int sk_fds[REUSEPORT_ARRAY_SIZE];
 37static int reuseport_array = -1, outer_map = -1;
 38static enum bpf_map_type inner_map_type;
 39static int select_by_skb_data_prog;
 
 40static struct bpf_object *obj;
 
 41static __u32 index_zero;
 42static int epfd;
 43
 44static union sa46 {
 45	struct sockaddr_in6 v6;
 46	struct sockaddr_in v4;
 47	sa_family_t family;
 48} srv_sa;
 49
 50#define RET_IF(condition, tag, format...) ({				\
 51	if (CHECK_FAIL(condition)) {					\
 52		printf(tag " " format);					\
 53		return;							\
 54	}								\
 55})
 56
 57#define RET_ERR(condition, tag, format...) ({				\
 58	if (CHECK_FAIL(condition)) {					\
 59		printf(tag " " format);					\
 60		return -1;						\
 61	}								\
 62})
 63
 64static int create_maps(enum bpf_map_type inner_type)
 65{
 66	LIBBPF_OPTS(bpf_map_create_opts, opts);
 67
 68	inner_map_type = inner_type;
 69
 70	/* Creating reuseport_array */
 71	reuseport_array = bpf_map_create(inner_type, "reuseport_array",
 72					 sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL);
 73	RET_ERR(reuseport_array < 0, "creating reuseport_array",
 
 
 
 
 
 74		"reuseport_array:%d errno:%d\n", reuseport_array, errno);
 75
 76	/* Creating outer_map */
 77	opts.inner_map_fd = reuseport_array;
 78	outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map",
 79				   sizeof(__u32), sizeof(__u32), 1, &opts);
 80	RET_ERR(outer_map < 0, "creating outer_map",
 
 
 
 
 81		"outer_map:%d errno:%d\n", outer_map, errno);
 82
 83	return 0;
 84}
 85
 86static int prepare_bpf_obj(void)
 87{
 88	struct bpf_program *prog;
 89	struct bpf_map *map;
 90	int err;
 91
 92	obj = bpf_object__open("test_select_reuseport_kern.bpf.o");
 93	err = libbpf_get_error(obj);
 94	RET_ERR(err, "open test_select_reuseport_kern.bpf.o",
 95		"obj:%p PTR_ERR(obj):%d\n", obj, err);
 96
 97	map = bpf_object__find_map_by_name(obj, "outer_map");
 98	RET_ERR(!map, "find outer_map", "!map\n");
 99	err = bpf_map__reuse_fd(map, outer_map);
100	RET_ERR(err, "reuse outer_map", "err:%d\n", err);
101
102	err = bpf_object__load(obj);
103	RET_ERR(err, "load bpf_object", "err:%d\n", err);
104
105	prog = bpf_object__next_program(obj, NULL);
106	RET_ERR(!prog, "get first bpf_program", "!prog\n");
107	select_by_skb_data_prog = bpf_program__fd(prog);
108	RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
109		"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
110
111	map = bpf_object__find_map_by_name(obj, "result_map");
112	RET_ERR(!map, "find result_map", "!map\n");
113	result_map = bpf_map__fd(map);
114	RET_ERR(result_map < 0, "get result_map fd",
115		"result_map:%d\n", result_map);
116
117	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
118	RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
119	tmp_index_ovr_map = bpf_map__fd(map);
120	RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
121		"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
122
123	map = bpf_object__find_map_by_name(obj, "linum_map");
124	RET_ERR(!map, "find linum_map", "!map\n");
125	linum_map = bpf_map__fd(map);
126	RET_ERR(linum_map < 0, "get linum_map fd",
127		"linum_map:%d\n", linum_map);
128
129	map = bpf_object__find_map_by_name(obj, "data_check_map");
130	RET_ERR(!map, "find data_check_map", "!map\n");
131	data_check_map = bpf_map__fd(map);
132	RET_ERR(data_check_map < 0, "get data_check_map fd",
133		"data_check_map:%d\n", data_check_map);
134
135	return 0;
136}
137
138static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
139{
140	memset(sa, 0, sizeof(*sa));
141	sa->family = family;
142	if (sa->family == AF_INET6)
143		sa->v6.sin6_addr = in6addr_loopback;
144	else
145		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
146}
147
148static void sa46_init_inany(union sa46 *sa, sa_family_t family)
149{
150	memset(sa, 0, sizeof(*sa));
151	sa->family = family;
152	if (sa->family == AF_INET6)
153		sa->v6.sin6_addr = in6addr_any;
154	else
155		sa->v4.sin_addr.s_addr = INADDR_ANY;
156}
157
158static int read_int_sysctl(const char *sysctl)
159{
160	char buf[16];
161	int fd, ret;
162
163	fd = open(sysctl, 0);
164	RET_ERR(fd == -1, "open(sysctl)",
165		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
166
167	ret = read(fd, buf, sizeof(buf));
168	RET_ERR(ret <= 0, "read(sysctl)",
169		"sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
170
171	close(fd);
172	return atoi(buf);
173}
174
175static int write_int_sysctl(const char *sysctl, int v)
176{
177	int fd, ret, size;
178	char buf[16];
179
180	fd = open(sysctl, O_RDWR);
181	RET_ERR(fd == -1, "open(sysctl)",
182		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
183
184	size = snprintf(buf, sizeof(buf), "%d", v);
185	ret = write(fd, buf, size);
186	RET_ERR(ret != size, "write(sysctl)",
187		"sysctl:%s ret:%d size:%d errno:%d\n",
188		sysctl, ret, size, errno);
189
190	close(fd);
191	return 0;
192}
193
 
 
 
 
 
 
 
 
194static int enable_fastopen(void)
195{
196	int fo;
197
198	fo = read_int_sysctl(TCP_FO_SYSCTL);
199	if (fo < 0)
200		return -1;
201
202	return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
203}
204
205static int enable_syncookie(void)
206{
207	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
208}
209
210static int disable_syncookie(void)
211{
212	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
213}
214
215static long get_linum(void)
216{
217	__u32 linum;
218	int err;
219
220	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
221	RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
222		err, errno);
223
224	return linum;
225}
226
227static void check_data(int type, sa_family_t family, const struct cmd *cmd,
228		       int cli_fd)
229{
230	struct data_check expected = {}, result;
231	union sa46 cli_sa;
232	socklen_t addrlen;
233	int err;
234
235	addrlen = sizeof(cli_sa);
236	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
237			  &addrlen);
238	RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
239	       err, errno);
240
241	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
242	RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
243	       err, errno);
244
245	if (type == SOCK_STREAM) {
246		expected.len = MIN_TCPHDR_LEN;
247		expected.ip_protocol = IPPROTO_TCP;
248	} else {
249		expected.len = UDPHDR_LEN;
250		expected.ip_protocol = IPPROTO_UDP;
251	}
252
253	if (family == AF_INET6) {
254		expected.eth_protocol = htons(ETH_P_IPV6);
255		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
256			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
257			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
258			!srv_sa.v6.sin6_addr.s6_addr32[0];
259
260		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
261		       sizeof(cli_sa.v6.sin6_addr));
262		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
263		       sizeof(in6addr_loopback));
264		expected.skb_ports[0] = cli_sa.v6.sin6_port;
265		expected.skb_ports[1] = srv_sa.v6.sin6_port;
266	} else {
267		expected.eth_protocol = htons(ETH_P_IP);
268		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
269
270		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
271		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
272		expected.skb_ports[0] = cli_sa.v4.sin_port;
273		expected.skb_ports[1] = srv_sa.v4.sin_port;
274	}
275
276	if (memcmp(&result, &expected, offsetof(struct data_check,
277						equal_check_end))) {
278		printf("unexpected data_check\n");
279		printf("  result: (0x%x, %u, %u)\n",
280		       result.eth_protocol, result.ip_protocol,
281		       result.bind_inany);
282		printf("expected: (0x%x, %u, %u)\n",
283		       expected.eth_protocol, expected.ip_protocol,
284		       expected.bind_inany);
285		RET_IF(1, "data_check result != expected",
286		       "bpf_prog_linum:%ld\n", get_linum());
287	}
288
289	RET_IF(!result.hash, "data_check result.hash empty",
290	       "result.hash:%u", result.hash);
291
292	expected.len += cmd ? sizeof(*cmd) : 0;
293	if (type == SOCK_STREAM)
294		RET_IF(expected.len > result.len, "expected.len > result.len",
295		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
296		       expected.len, result.len, get_linum());
297	else
298		RET_IF(expected.len != result.len, "expected.len != result.len",
299		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
300		       expected.len, result.len, get_linum());
301}
302
303static const char *result_to_str(enum result res)
304{
305	switch (res) {
306	case DROP_ERR_INNER_MAP:
307		return "DROP_ERR_INNER_MAP";
308	case DROP_ERR_SKB_DATA:
309		return "DROP_ERR_SKB_DATA";
310	case DROP_ERR_SK_SELECT_REUSEPORT:
311		return "DROP_ERR_SK_SELECT_REUSEPORT";
312	case DROP_MISC:
313		return "DROP_MISC";
314	case PASS:
315		return "PASS";
316	case PASS_ERR_SK_SELECT_REUSEPORT:
317		return "PASS_ERR_SK_SELECT_REUSEPORT";
318	default:
319		return "UNKNOWN";
320	}
321}
322
323static void check_results(void)
324{
325	__u32 results[NR_RESULTS];
326	__u32 i, broken = 0;
327	int err;
328
329	for (i = 0; i < NR_RESULTS; i++) {
330		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
331		RET_IF(err < 0, "lookup_elem(result_map)",
332		       "i:%u err:%d errno:%d\n", i, err, errno);
333	}
334
335	for (i = 0; i < NR_RESULTS; i++) {
336		if (results[i] != expected_results[i]) {
337			broken = i;
338			break;
339		}
340	}
341
342	if (i == NR_RESULTS)
343		return;
344
345	printf("unexpected result\n");
346	printf(" result: [");
347	printf("%u", results[0]);
348	for (i = 1; i < NR_RESULTS; i++)
349		printf(", %u", results[i]);
350	printf("]\n");
351
352	printf("expected: [");
353	printf("%u", expected_results[0]);
354	for (i = 1; i < NR_RESULTS; i++)
355		printf(", %u", expected_results[i]);
356	printf("]\n");
357
358	printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
359	       get_linum());
360
361	CHECK_FAIL(true);
362}
363
364static int send_data(int type, sa_family_t family, void *data, size_t len,
365		     enum result expected)
366{
367	union sa46 cli_sa;
368	int fd, err;
369
370	fd = socket(family, type, 0);
371	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
372
373	sa46_init_loopback(&cli_sa, family);
374	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
375	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
376
377	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
378		     sizeof(srv_sa));
379	RET_ERR(err != len && expected >= PASS,
380		"sendto()", "family:%u err:%d errno:%d expected:%d\n",
381		family, err, errno, expected);
382
383	return fd;
384}
385
386static void do_test(int type, sa_family_t family, struct cmd *cmd,
387		    enum result expected)
388{
389	int nev, srv_fd, cli_fd;
390	struct epoll_event ev;
391	struct cmd rcv_cmd;
392	ssize_t nread;
393
394	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
395			   expected);
396	if (cli_fd < 0)
397		return;
398	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
399	RET_IF((nev <= 0 && expected >= PASS) ||
400	       (nev > 0 && expected < PASS),
401	       "nev <> expected",
402	       "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
403	       nev, expected, type, family,
404	       cmd ? cmd->reuseport_index : -1,
405	       cmd ? cmd->pass_on_failure : -1);
406	check_results();
407	check_data(type, family, cmd, cli_fd);
408
409	if (expected < PASS)
410		return;
411
412	RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
413	       cmd->reuseport_index != ev.data.u32,
414	       "check cmd->reuseport_index",
415	       "cmd:(%u, %u) ev.data.u32:%u\n",
416	       cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
417
418	srv_fd = sk_fds[ev.data.u32];
419	if (type == SOCK_STREAM) {
420		int new_fd = accept(srv_fd, NULL, 0);
421
422		RET_IF(new_fd == -1, "accept(srv_fd)",
423		       "ev.data.u32:%u new_fd:%d errno:%d\n",
424		       ev.data.u32, new_fd, errno);
425
426		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
427		RET_IF(nread != sizeof(rcv_cmd),
428		       "recv(new_fd)",
429		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
430		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
431
432		close(new_fd);
433	} else {
434		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
435		RET_IF(nread != sizeof(rcv_cmd),
436		       "recv(sk_fds)",
437		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
438		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
439	}
440
441	close(cli_fd);
442}
443
444static void test_err_inner_map(int type, sa_family_t family)
445{
446	struct cmd cmd = {
447		.reuseport_index = 0,
448		.pass_on_failure = 0,
449	};
450
451	expected_results[DROP_ERR_INNER_MAP]++;
452	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
453}
454
455static void test_err_skb_data(int type, sa_family_t family)
456{
457	expected_results[DROP_ERR_SKB_DATA]++;
458	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
459}
460
461static void test_err_sk_select_port(int type, sa_family_t family)
462{
463	struct cmd cmd = {
464		.reuseport_index = REUSEPORT_ARRAY_SIZE,
465		.pass_on_failure = 0,
466	};
467
468	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
469	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
470}
471
472static void test_pass(int type, sa_family_t family)
473{
474	struct cmd cmd;
475	int i;
476
477	cmd.pass_on_failure = 0;
478	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
479		expected_results[PASS]++;
480		cmd.reuseport_index = i;
481		do_test(type, family, &cmd, PASS);
482	}
483}
484
485static void test_syncookie(int type, sa_family_t family)
486{
487	int err, tmp_index = 1;
488	struct cmd cmd = {
489		.reuseport_index = 0,
490		.pass_on_failure = 0,
491	};
492
493	/*
494	 * +1 for TCP-SYN and
495	 * +1 for the TCP-ACK (ack the syncookie)
496	 */
497	expected_results[PASS] += 2;
498	enable_syncookie();
499	/*
500	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
501	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
502	 *          tmp_index_ovr_map
503	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
504	 *          is from the cmd.reuseport_index
505	 */
506	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
507				  &tmp_index, BPF_ANY);
508	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
509	       "err:%d errno:%d\n", err, errno);
510	do_test(type, family, &cmd, PASS);
511	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
512				  &tmp_index);
513	RET_IF(err < 0 || tmp_index >= 0,
514	       "lookup_elem(tmp_index_ovr_map)",
515	       "err:%d errno:%d tmp_index:%d\n",
516	       err, errno, tmp_index);
517	disable_syncookie();
518}
519
520static void test_pass_on_err(int type, sa_family_t family)
521{
522	struct cmd cmd = {
523		.reuseport_index = REUSEPORT_ARRAY_SIZE,
524		.pass_on_failure = 1,
525	};
526
527	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
528	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
529}
530
531static void test_detach_bpf(int type, sa_family_t family)
532{
533#ifdef SO_DETACH_REUSEPORT_BPF
534	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
535	struct epoll_event ev;
536	int cli_fd, err, nev;
537	struct cmd cmd = {};
538	int optvalue = 0;
539
540	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
541			 &optvalue, sizeof(optvalue));
542	RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
543	       "err:%d errno:%d\n", err, errno);
544
545	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
546			 &optvalue, sizeof(optvalue));
547	RET_IF(err == 0 || errno != ENOENT,
548	       "setsockopt(SO_DETACH_REUSEPORT_BPF)",
549	       "err:%d errno:%d\n", err, errno);
550
551	for (i = 0; i < NR_RESULTS; i++) {
552		err = bpf_map_lookup_elem(result_map, &i, &tmp);
553		RET_IF(err < 0, "lookup_elem(result_map)",
554		       "i:%u err:%d errno:%d\n", i, err, errno);
555		nr_run_before += tmp;
556	}
557
558	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
559	if (cli_fd < 0)
560		return;
561	nev = epoll_wait(epfd, &ev, 1, 5);
562	RET_IF(nev <= 0, "nev <= 0",
563	       "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
564	       nev,  type, family);
565
566	for (i = 0; i < NR_RESULTS; i++) {
567		err = bpf_map_lookup_elem(result_map, &i, &tmp);
568		RET_IF(err < 0, "lookup_elem(result_map)",
569		       "i:%u err:%d errno:%d\n", i, err, errno);
570		nr_run_after += tmp;
571	}
572
573	RET_IF(nr_run_before != nr_run_after,
574	       "nr_run_before != nr_run_after",
575	       "nr_run_before:%u nr_run_after:%u\n",
576	       nr_run_before, nr_run_after);
577
578	close(cli_fd);
579#else
580	test__skip();
581#endif
582}
583
584static void prepare_sk_fds(int type, sa_family_t family, bool inany)
585{
586	const int first = REUSEPORT_ARRAY_SIZE - 1;
587	int i, err, optval = 1;
588	struct epoll_event ev;
589	socklen_t addrlen;
590
591	if (inany)
592		sa46_init_inany(&srv_sa, family);
593	else
594		sa46_init_loopback(&srv_sa, family);
595	addrlen = sizeof(srv_sa);
596
597	/*
598	 * The sk_fds[] is filled from the back such that the order
599	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
600	 */
601	for (i = first; i >= 0; i--) {
602		sk_fds[i] = socket(family, type, 0);
603		RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
604		       i, sk_fds[i], errno);
605		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
606				 &optval, sizeof(optval));
607		RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
608		       "sk_fds[%d] err:%d errno:%d\n",
609		       i, err, errno);
610
611		if (i == first) {
612			err = setsockopt(sk_fds[i], SOL_SOCKET,
613					 SO_ATTACH_REUSEPORT_EBPF,
614					 &select_by_skb_data_prog,
615					 sizeof(select_by_skb_data_prog));
616			RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
617			       "err:%d errno:%d\n", err, errno);
618		}
619
620		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
621		RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
622		       i, err, errno);
623
624		if (type == SOCK_STREAM) {
625			err = listen(sk_fds[i], 10);
626			RET_IF(err < 0, "listen()",
627			       "sk_fds[%d] err:%d errno:%d\n",
628			       i, err, errno);
629		}
630
631		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
632					  BPF_NOEXIST);
633		RET_IF(err < 0, "update_elem(reuseport_array)",
634		       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
635
636		if (i == first) {
637			socklen_t addrlen = sizeof(srv_sa);
638
639			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
640					  &addrlen);
641			RET_IF(err == -1, "getsockname()",
642			       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
643		}
644	}
645
646	epfd = epoll_create(1);
647	RET_IF(epfd == -1, "epoll_create(1)",
648	       "epfd:%d errno:%d\n", epfd, errno);
649
650	ev.events = EPOLLIN;
651	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
652		ev.data.u32 = i;
653		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
654		RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
655	}
656}
657
658static void setup_per_test(int type, sa_family_t family, bool inany,
659			   bool no_inner_map)
660{
661	int ovr = -1, err;
662
663	prepare_sk_fds(type, family, inany);
664	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
665				  BPF_ANY);
666	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
667	       "err:%d errno:%d\n", err, errno);
668
669	/* Install reuseport_array to outer_map? */
670	if (no_inner_map)
671		return;
672
673	err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
674				  BPF_ANY);
675	RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
676	       "err:%d errno:%d\n", err, errno);
677}
678
679static void cleanup_per_test(bool no_inner_map)
680{
681	int i, err, zero = 0;
682
683	memset(expected_results, 0, sizeof(expected_results));
684
685	for (i = 0; i < NR_RESULTS; i++) {
686		err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
687		RET_IF(err, "reset elem in result_map",
688		       "i:%u err:%d errno:%d\n", i, err, errno);
689	}
690
691	err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
692	RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
693	       err, errno);
694
695	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
696		close(sk_fds[i]);
697	close(epfd);
698
699	/* Delete reuseport_array from outer_map? */
700	if (no_inner_map)
701		return;
702
703	err = bpf_map_delete_elem(outer_map, &index_zero);
704	RET_IF(err < 0, "delete_elem(outer_map)",
705	       "err:%d errno:%d\n", err, errno);
706}
707
708static void cleanup(void)
709{
710	if (outer_map >= 0) {
711		close(outer_map);
712		outer_map = -1;
713	}
714
715	if (reuseport_array >= 0) {
716		close(reuseport_array);
717		reuseport_array = -1;
718	}
719
720	if (obj) {
721		bpf_object__close(obj);
722		obj = NULL;
723	}
724
725	memset(expected_results, 0, sizeof(expected_results));
726}
727
728static const char *maptype_str(enum bpf_map_type type)
729{
730	switch (type) {
731	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
732		return "reuseport_sockarray";
733	case BPF_MAP_TYPE_SOCKMAP:
734		return "sockmap";
735	case BPF_MAP_TYPE_SOCKHASH:
736		return "sockhash";
737	default:
738		return "unknown";
739	}
740}
741
742static const char *family_str(sa_family_t family)
743{
744	switch (family) {
745	case AF_INET:
746		return "IPv4";
747	case AF_INET6:
748		return "IPv6";
749	default:
750		return "unknown";
751	}
752}
753
754static const char *sotype_str(int sotype)
755{
756	switch (sotype) {
757	case SOCK_STREAM:
758		return "TCP";
759	case SOCK_DGRAM:
760		return "UDP";
761	default:
762		return "unknown";
763	}
764}
765
766#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
767
768static void test_config(int sotype, sa_family_t family, bool inany)
769{
770	const struct test {
771		void (*fn)(int sotype, sa_family_t family);
772		const char *name;
773		bool no_inner_map;
774		int need_sotype;
775	} tests[] = {
776		TEST_INIT(test_err_inner_map,
777			  .no_inner_map = true),
778		TEST_INIT(test_err_skb_data),
779		TEST_INIT(test_err_sk_select_port),
780		TEST_INIT(test_pass),
781		TEST_INIT(test_syncookie,
782			  .need_sotype = SOCK_STREAM),
783		TEST_INIT(test_pass_on_err),
784		TEST_INIT(test_detach_bpf),
785	};
786	struct netns_obj *netns;
787	char s[MAX_TEST_NAME];
788	const struct test *t;
789
790	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
791		if (t->need_sotype && t->need_sotype != sotype)
792			continue; /* test not compatible with socket type */
793
794		snprintf(s, sizeof(s), "%s %s/%s %s %s",
795			 maptype_str(inner_map_type),
796			 family_str(family), sotype_str(sotype),
797			 inany ? "INANY" : "LOOPBACK", t->name);
798
799		if (!test__start_subtest(s))
800			continue;
801
802		netns = netns_new("select_reuseport", true);
803		if (!ASSERT_OK_PTR(netns, "netns_new"))
804			continue;
805
806		if (CHECK_FAIL(enable_fastopen()))
807			goto out;
808		if (CHECK_FAIL(disable_syncookie()))
809			goto out;
810
811		setup_per_test(sotype, family, inany, t->no_inner_map);
812		t->fn(sotype, family);
813		cleanup_per_test(t->no_inner_map);
814
815out:
816		netns_free(netns);
817	}
818}
819
820#define BIND_INANY true
821
822static void test_all(void)
823{
824	const struct config {
825		int sotype;
826		sa_family_t family;
827		bool inany;
828	} configs[] = {
829		{ SOCK_STREAM, AF_INET },
830		{ SOCK_STREAM, AF_INET, BIND_INANY },
831		{ SOCK_STREAM, AF_INET6 },
832		{ SOCK_STREAM, AF_INET6, BIND_INANY },
833		{ SOCK_DGRAM, AF_INET },
834		{ SOCK_DGRAM, AF_INET6 },
835	};
836	const struct config *c;
837
838	for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
839		test_config(c->sotype, c->family, c->inany);
840}
841
842void test_map_type(enum bpf_map_type mt)
843{
844	if (create_maps(mt))
845		goto out;
846	if (prepare_bpf_obj())
847		goto out;
848
849	test_all();
850out:
851	cleanup();
852}
853
854void serial_test_select_reuseport(void)
855{
 
 
 
 
 
 
 
 
 
 
 
 
856	test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
857	test_map_type(BPF_MAP_TYPE_SOCKMAP);
858	test_map_type(BPF_MAP_TYPE_SOCKHASH);
 
 
859}
v5.9
  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2018 Facebook */
  3
  4#include <stdlib.h>
  5#include <unistd.h>
  6#include <stdbool.h>
  7#include <string.h>
  8#include <errno.h>
  9#include <assert.h>
 10#include <fcntl.h>
 11#include <linux/bpf.h>
 12#include <linux/err.h>
 13#include <linux/types.h>
 14#include <linux/if_ether.h>
 15#include <sys/types.h>
 16#include <sys/epoll.h>
 17#include <sys/socket.h>
 18#include <netinet/in.h>
 19#include <bpf/bpf.h>
 20#include <bpf/libbpf.h>
 21#include "bpf_rlimit.h"
 22#include "bpf_util.h"
 23
 24#include "test_progs.h"
 25#include "test_select_reuseport_common.h"
 26
 27#define MAX_TEST_NAME 80
 28#define MIN_TCPHDR_LEN 20
 29#define UDPHDR_LEN 8
 30
 31#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
 32#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
 33#define REUSEPORT_ARRAY_SIZE 32
 34
 35static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
 36static __u32 expected_results[NR_RESULTS];
 37static int sk_fds[REUSEPORT_ARRAY_SIZE];
 38static int reuseport_array = -1, outer_map = -1;
 39static enum bpf_map_type inner_map_type;
 40static int select_by_skb_data_prog;
 41static int saved_tcp_syncookie = -1;
 42static struct bpf_object *obj;
 43static int saved_tcp_fo = -1;
 44static __u32 index_zero;
 45static int epfd;
 46
 47static union sa46 {
 48	struct sockaddr_in6 v6;
 49	struct sockaddr_in v4;
 50	sa_family_t family;
 51} srv_sa;
 52
 53#define RET_IF(condition, tag, format...) ({				\
 54	if (CHECK_FAIL(condition)) {					\
 55		printf(tag " " format);					\
 56		return;							\
 57	}								\
 58})
 59
 60#define RET_ERR(condition, tag, format...) ({				\
 61	if (CHECK_FAIL(condition)) {					\
 62		printf(tag " " format);					\
 63		return -1;						\
 64	}								\
 65})
 66
 67static int create_maps(enum bpf_map_type inner_type)
 68{
 69	struct bpf_create_map_attr attr = {};
 70
 71	inner_map_type = inner_type;
 72
 73	/* Creating reuseport_array */
 74	attr.name = "reuseport_array";
 75	attr.map_type = inner_type;
 76	attr.key_size = sizeof(__u32);
 77	attr.value_size = sizeof(__u32);
 78	attr.max_entries = REUSEPORT_ARRAY_SIZE;
 79
 80	reuseport_array = bpf_create_map_xattr(&attr);
 81	RET_ERR(reuseport_array == -1, "creating reuseport_array",
 82		"reuseport_array:%d errno:%d\n", reuseport_array, errno);
 83
 84	/* Creating outer_map */
 85	attr.name = "outer_map";
 86	attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
 87	attr.key_size = sizeof(__u32);
 88	attr.value_size = sizeof(__u32);
 89	attr.max_entries = 1;
 90	attr.inner_map_fd = reuseport_array;
 91	outer_map = bpf_create_map_xattr(&attr);
 92	RET_ERR(outer_map == -1, "creating outer_map",
 93		"outer_map:%d errno:%d\n", outer_map, errno);
 94
 95	return 0;
 96}
 97
 98static int prepare_bpf_obj(void)
 99{
100	struct bpf_program *prog;
101	struct bpf_map *map;
102	int err;
103
104	obj = bpf_object__open("test_select_reuseport_kern.o");
105	RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
106		"obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
 
107
108	map = bpf_object__find_map_by_name(obj, "outer_map");
109	RET_ERR(!map, "find outer_map", "!map\n");
110	err = bpf_map__reuse_fd(map, outer_map);
111	RET_ERR(err, "reuse outer_map", "err:%d\n", err);
112
113	err = bpf_object__load(obj);
114	RET_ERR(err, "load bpf_object", "err:%d\n", err);
115
116	prog = bpf_program__next(NULL, obj);
117	RET_ERR(!prog, "get first bpf_program", "!prog\n");
118	select_by_skb_data_prog = bpf_program__fd(prog);
119	RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
120		"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
121
122	map = bpf_object__find_map_by_name(obj, "result_map");
123	RET_ERR(!map, "find result_map", "!map\n");
124	result_map = bpf_map__fd(map);
125	RET_ERR(result_map == -1, "get result_map fd",
126		"result_map:%d\n", result_map);
127
128	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
129	RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
130	tmp_index_ovr_map = bpf_map__fd(map);
131	RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
132		"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
133
134	map = bpf_object__find_map_by_name(obj, "linum_map");
135	RET_ERR(!map, "find linum_map", "!map\n");
136	linum_map = bpf_map__fd(map);
137	RET_ERR(linum_map == -1, "get linum_map fd",
138		"linum_map:%d\n", linum_map);
139
140	map = bpf_object__find_map_by_name(obj, "data_check_map");
141	RET_ERR(!map, "find data_check_map", "!map\n");
142	data_check_map = bpf_map__fd(map);
143	RET_ERR(data_check_map == -1, "get data_check_map fd",
144		"data_check_map:%d\n", data_check_map);
145
146	return 0;
147}
148
149static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
150{
151	memset(sa, 0, sizeof(*sa));
152	sa->family = family;
153	if (sa->family == AF_INET6)
154		sa->v6.sin6_addr = in6addr_loopback;
155	else
156		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
157}
158
159static void sa46_init_inany(union sa46 *sa, sa_family_t family)
160{
161	memset(sa, 0, sizeof(*sa));
162	sa->family = family;
163	if (sa->family == AF_INET6)
164		sa->v6.sin6_addr = in6addr_any;
165	else
166		sa->v4.sin_addr.s_addr = INADDR_ANY;
167}
168
169static int read_int_sysctl(const char *sysctl)
170{
171	char buf[16];
172	int fd, ret;
173
174	fd = open(sysctl, 0);
175	RET_ERR(fd == -1, "open(sysctl)",
176		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
177
178	ret = read(fd, buf, sizeof(buf));
179	RET_ERR(ret <= 0, "read(sysctl)",
180		"sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
181
182	close(fd);
183	return atoi(buf);
184}
185
186static int write_int_sysctl(const char *sysctl, int v)
187{
188	int fd, ret, size;
189	char buf[16];
190
191	fd = open(sysctl, O_RDWR);
192	RET_ERR(fd == -1, "open(sysctl)",
193		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
194
195	size = snprintf(buf, sizeof(buf), "%d", v);
196	ret = write(fd, buf, size);
197	RET_ERR(ret != size, "write(sysctl)",
198		"sysctl:%s ret:%d size:%d errno:%d\n",
199		sysctl, ret, size, errno);
200
201	close(fd);
202	return 0;
203}
204
205static void restore_sysctls(void)
206{
207	if (saved_tcp_fo != -1)
208		write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
209	if (saved_tcp_syncookie != -1)
210		write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
211}
212
213static int enable_fastopen(void)
214{
215	int fo;
216
217	fo = read_int_sysctl(TCP_FO_SYSCTL);
218	if (fo < 0)
219		return -1;
220
221	return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
222}
223
224static int enable_syncookie(void)
225{
226	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
227}
228
229static int disable_syncookie(void)
230{
231	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
232}
233
234static long get_linum(void)
235{
236	__u32 linum;
237	int err;
238
239	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
240	RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
241		err, errno);
242
243	return linum;
244}
245
246static void check_data(int type, sa_family_t family, const struct cmd *cmd,
247		       int cli_fd)
248{
249	struct data_check expected = {}, result;
250	union sa46 cli_sa;
251	socklen_t addrlen;
252	int err;
253
254	addrlen = sizeof(cli_sa);
255	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
256			  &addrlen);
257	RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
258	       err, errno);
259
260	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
261	RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
262	       err, errno);
263
264	if (type == SOCK_STREAM) {
265		expected.len = MIN_TCPHDR_LEN;
266		expected.ip_protocol = IPPROTO_TCP;
267	} else {
268		expected.len = UDPHDR_LEN;
269		expected.ip_protocol = IPPROTO_UDP;
270	}
271
272	if (family == AF_INET6) {
273		expected.eth_protocol = htons(ETH_P_IPV6);
274		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
275			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
276			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
277			!srv_sa.v6.sin6_addr.s6_addr32[0];
278
279		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
280		       sizeof(cli_sa.v6.sin6_addr));
281		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
282		       sizeof(in6addr_loopback));
283		expected.skb_ports[0] = cli_sa.v6.sin6_port;
284		expected.skb_ports[1] = srv_sa.v6.sin6_port;
285	} else {
286		expected.eth_protocol = htons(ETH_P_IP);
287		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
288
289		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
290		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
291		expected.skb_ports[0] = cli_sa.v4.sin_port;
292		expected.skb_ports[1] = srv_sa.v4.sin_port;
293	}
294
295	if (memcmp(&result, &expected, offsetof(struct data_check,
296						equal_check_end))) {
297		printf("unexpected data_check\n");
298		printf("  result: (0x%x, %u, %u)\n",
299		       result.eth_protocol, result.ip_protocol,
300		       result.bind_inany);
301		printf("expected: (0x%x, %u, %u)\n",
302		       expected.eth_protocol, expected.ip_protocol,
303		       expected.bind_inany);
304		RET_IF(1, "data_check result != expected",
305		       "bpf_prog_linum:%ld\n", get_linum());
306	}
307
308	RET_IF(!result.hash, "data_check result.hash empty",
309	       "result.hash:%u", result.hash);
310
311	expected.len += cmd ? sizeof(*cmd) : 0;
312	if (type == SOCK_STREAM)
313		RET_IF(expected.len > result.len, "expected.len > result.len",
314		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
315		       expected.len, result.len, get_linum());
316	else
317		RET_IF(expected.len != result.len, "expected.len != result.len",
318		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
319		       expected.len, result.len, get_linum());
320}
321
322static const char *result_to_str(enum result res)
323{
324	switch (res) {
325	case DROP_ERR_INNER_MAP:
326		return "DROP_ERR_INNER_MAP";
327	case DROP_ERR_SKB_DATA:
328		return "DROP_ERR_SKB_DATA";
329	case DROP_ERR_SK_SELECT_REUSEPORT:
330		return "DROP_ERR_SK_SELECT_REUSEPORT";
331	case DROP_MISC:
332		return "DROP_MISC";
333	case PASS:
334		return "PASS";
335	case PASS_ERR_SK_SELECT_REUSEPORT:
336		return "PASS_ERR_SK_SELECT_REUSEPORT";
337	default:
338		return "UNKNOWN";
339	}
340}
341
342static void check_results(void)
343{
344	__u32 results[NR_RESULTS];
345	__u32 i, broken = 0;
346	int err;
347
348	for (i = 0; i < NR_RESULTS; i++) {
349		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
350		RET_IF(err == -1, "lookup_elem(result_map)",
351		       "i:%u err:%d errno:%d\n", i, err, errno);
352	}
353
354	for (i = 0; i < NR_RESULTS; i++) {
355		if (results[i] != expected_results[i]) {
356			broken = i;
357			break;
358		}
359	}
360
361	if (i == NR_RESULTS)
362		return;
363
364	printf("unexpected result\n");
365	printf(" result: [");
366	printf("%u", results[0]);
367	for (i = 1; i < NR_RESULTS; i++)
368		printf(", %u", results[i]);
369	printf("]\n");
370
371	printf("expected: [");
372	printf("%u", expected_results[0]);
373	for (i = 1; i < NR_RESULTS; i++)
374		printf(", %u", expected_results[i]);
375	printf("]\n");
376
377	printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
378	       get_linum());
379
380	CHECK_FAIL(true);
381}
382
383static int send_data(int type, sa_family_t family, void *data, size_t len,
384		     enum result expected)
385{
386	union sa46 cli_sa;
387	int fd, err;
388
389	fd = socket(family, type, 0);
390	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
391
392	sa46_init_loopback(&cli_sa, family);
393	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
394	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
395
396	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
397		     sizeof(srv_sa));
398	RET_ERR(err != len && expected >= PASS,
399		"sendto()", "family:%u err:%d errno:%d expected:%d\n",
400		family, err, errno, expected);
401
402	return fd;
403}
404
405static void do_test(int type, sa_family_t family, struct cmd *cmd,
406		    enum result expected)
407{
408	int nev, srv_fd, cli_fd;
409	struct epoll_event ev;
410	struct cmd rcv_cmd;
411	ssize_t nread;
412
413	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
414			   expected);
415	if (cli_fd < 0)
416		return;
417	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
418	RET_IF((nev <= 0 && expected >= PASS) ||
419	       (nev > 0 && expected < PASS),
420	       "nev <> expected",
421	       "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
422	       nev, expected, type, family,
423	       cmd ? cmd->reuseport_index : -1,
424	       cmd ? cmd->pass_on_failure : -1);
425	check_results();
426	check_data(type, family, cmd, cli_fd);
427
428	if (expected < PASS)
429		return;
430
431	RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
432	       cmd->reuseport_index != ev.data.u32,
433	       "check cmd->reuseport_index",
434	       "cmd:(%u, %u) ev.data.u32:%u\n",
435	       cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
436
437	srv_fd = sk_fds[ev.data.u32];
438	if (type == SOCK_STREAM) {
439		int new_fd = accept(srv_fd, NULL, 0);
440
441		RET_IF(new_fd == -1, "accept(srv_fd)",
442		       "ev.data.u32:%u new_fd:%d errno:%d\n",
443		       ev.data.u32, new_fd, errno);
444
445		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
446		RET_IF(nread != sizeof(rcv_cmd),
447		       "recv(new_fd)",
448		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
449		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
450
451		close(new_fd);
452	} else {
453		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
454		RET_IF(nread != sizeof(rcv_cmd),
455		       "recv(sk_fds)",
456		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
457		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
458	}
459
460	close(cli_fd);
461}
462
463static void test_err_inner_map(int type, sa_family_t family)
464{
465	struct cmd cmd = {
466		.reuseport_index = 0,
467		.pass_on_failure = 0,
468	};
469
470	expected_results[DROP_ERR_INNER_MAP]++;
471	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
472}
473
474static void test_err_skb_data(int type, sa_family_t family)
475{
476	expected_results[DROP_ERR_SKB_DATA]++;
477	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
478}
479
480static void test_err_sk_select_port(int type, sa_family_t family)
481{
482	struct cmd cmd = {
483		.reuseport_index = REUSEPORT_ARRAY_SIZE,
484		.pass_on_failure = 0,
485	};
486
487	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
488	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
489}
490
491static void test_pass(int type, sa_family_t family)
492{
493	struct cmd cmd;
494	int i;
495
496	cmd.pass_on_failure = 0;
497	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
498		expected_results[PASS]++;
499		cmd.reuseport_index = i;
500		do_test(type, family, &cmd, PASS);
501	}
502}
503
504static void test_syncookie(int type, sa_family_t family)
505{
506	int err, tmp_index = 1;
507	struct cmd cmd = {
508		.reuseport_index = 0,
509		.pass_on_failure = 0,
510	};
511
512	/*
513	 * +1 for TCP-SYN and
514	 * +1 for the TCP-ACK (ack the syncookie)
515	 */
516	expected_results[PASS] += 2;
517	enable_syncookie();
518	/*
519	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
520	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
521	 *          tmp_index_ovr_map
522	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
523	 *          is from the cmd.reuseport_index
524	 */
525	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
526				  &tmp_index, BPF_ANY);
527	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
528	       "err:%d errno:%d\n", err, errno);
529	do_test(type, family, &cmd, PASS);
530	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
531				  &tmp_index);
532	RET_IF(err == -1 || tmp_index != -1,
533	       "lookup_elem(tmp_index_ovr_map)",
534	       "err:%d errno:%d tmp_index:%d\n",
535	       err, errno, tmp_index);
536	disable_syncookie();
537}
538
539static void test_pass_on_err(int type, sa_family_t family)
540{
541	struct cmd cmd = {
542		.reuseport_index = REUSEPORT_ARRAY_SIZE,
543		.pass_on_failure = 1,
544	};
545
546	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
547	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
548}
549
550static void test_detach_bpf(int type, sa_family_t family)
551{
552#ifdef SO_DETACH_REUSEPORT_BPF
553	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
554	struct epoll_event ev;
555	int cli_fd, err, nev;
556	struct cmd cmd = {};
557	int optvalue = 0;
558
559	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
560			 &optvalue, sizeof(optvalue));
561	RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
562	       "err:%d errno:%d\n", err, errno);
563
564	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
565			 &optvalue, sizeof(optvalue));
566	RET_IF(err == 0 || errno != ENOENT,
567	       "setsockopt(SO_DETACH_REUSEPORT_BPF)",
568	       "err:%d errno:%d\n", err, errno);
569
570	for (i = 0; i < NR_RESULTS; i++) {
571		err = bpf_map_lookup_elem(result_map, &i, &tmp);
572		RET_IF(err == -1, "lookup_elem(result_map)",
573		       "i:%u err:%d errno:%d\n", i, err, errno);
574		nr_run_before += tmp;
575	}
576
577	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
578	if (cli_fd < 0)
579		return;
580	nev = epoll_wait(epfd, &ev, 1, 5);
581	RET_IF(nev <= 0, "nev <= 0",
582	       "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
583	       nev,  type, family);
584
585	for (i = 0; i < NR_RESULTS; i++) {
586		err = bpf_map_lookup_elem(result_map, &i, &tmp);
587		RET_IF(err == -1, "lookup_elem(result_map)",
588		       "i:%u err:%d errno:%d\n", i, err, errno);
589		nr_run_after += tmp;
590	}
591
592	RET_IF(nr_run_before != nr_run_after,
593	       "nr_run_before != nr_run_after",
594	       "nr_run_before:%u nr_run_after:%u\n",
595	       nr_run_before, nr_run_after);
596
597	close(cli_fd);
598#else
599	test__skip();
600#endif
601}
602
603static void prepare_sk_fds(int type, sa_family_t family, bool inany)
604{
605	const int first = REUSEPORT_ARRAY_SIZE - 1;
606	int i, err, optval = 1;
607	struct epoll_event ev;
608	socklen_t addrlen;
609
610	if (inany)
611		sa46_init_inany(&srv_sa, family);
612	else
613		sa46_init_loopback(&srv_sa, family);
614	addrlen = sizeof(srv_sa);
615
616	/*
617	 * The sk_fds[] is filled from the back such that the order
618	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
619	 */
620	for (i = first; i >= 0; i--) {
621		sk_fds[i] = socket(family, type, 0);
622		RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
623		       i, sk_fds[i], errno);
624		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
625				 &optval, sizeof(optval));
626		RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
627		       "sk_fds[%d] err:%d errno:%d\n",
628		       i, err, errno);
629
630		if (i == first) {
631			err = setsockopt(sk_fds[i], SOL_SOCKET,
632					 SO_ATTACH_REUSEPORT_EBPF,
633					 &select_by_skb_data_prog,
634					 sizeof(select_by_skb_data_prog));
635			RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
636			       "err:%d errno:%d\n", err, errno);
637		}
638
639		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
640		RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
641		       i, err, errno);
642
643		if (type == SOCK_STREAM) {
644			err = listen(sk_fds[i], 10);
645			RET_IF(err == -1, "listen()",
646			       "sk_fds[%d] err:%d errno:%d\n",
647			       i, err, errno);
648		}
649
650		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
651					  BPF_NOEXIST);
652		RET_IF(err == -1, "update_elem(reuseport_array)",
653		       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
654
655		if (i == first) {
656			socklen_t addrlen = sizeof(srv_sa);
657
658			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
659					  &addrlen);
660			RET_IF(err == -1, "getsockname()",
661			       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
662		}
663	}
664
665	epfd = epoll_create(1);
666	RET_IF(epfd == -1, "epoll_create(1)",
667	       "epfd:%d errno:%d\n", epfd, errno);
668
669	ev.events = EPOLLIN;
670	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
671		ev.data.u32 = i;
672		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
673		RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
674	}
675}
676
677static void setup_per_test(int type, sa_family_t family, bool inany,
678			   bool no_inner_map)
679{
680	int ovr = -1, err;
681
682	prepare_sk_fds(type, family, inany);
683	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
684				  BPF_ANY);
685	RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
686	       "err:%d errno:%d\n", err, errno);
687
688	/* Install reuseport_array to outer_map? */
689	if (no_inner_map)
690		return;
691
692	err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
693				  BPF_ANY);
694	RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
695	       "err:%d errno:%d\n", err, errno);
696}
697
698static void cleanup_per_test(bool no_inner_map)
699{
700	int i, err, zero = 0;
701
702	memset(expected_results, 0, sizeof(expected_results));
703
704	for (i = 0; i < NR_RESULTS; i++) {
705		err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
706		RET_IF(err, "reset elem in result_map",
707		       "i:%u err:%d errno:%d\n", i, err, errno);
708	}
709
710	err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
711	RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
712	       err, errno);
713
714	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
715		close(sk_fds[i]);
716	close(epfd);
717
718	/* Delete reuseport_array from outer_map? */
719	if (no_inner_map)
720		return;
721
722	err = bpf_map_delete_elem(outer_map, &index_zero);
723	RET_IF(err == -1, "delete_elem(outer_map)",
724	       "err:%d errno:%d\n", err, errno);
725}
726
727static void cleanup(void)
728{
729	if (outer_map != -1) {
730		close(outer_map);
731		outer_map = -1;
732	}
733
734	if (reuseport_array != -1) {
735		close(reuseport_array);
736		reuseport_array = -1;
737	}
738
739	if (obj) {
740		bpf_object__close(obj);
741		obj = NULL;
742	}
743
744	memset(expected_results, 0, sizeof(expected_results));
745}
746
747static const char *maptype_str(enum bpf_map_type type)
748{
749	switch (type) {
750	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
751		return "reuseport_sockarray";
752	case BPF_MAP_TYPE_SOCKMAP:
753		return "sockmap";
754	case BPF_MAP_TYPE_SOCKHASH:
755		return "sockhash";
756	default:
757		return "unknown";
758	}
759}
760
761static const char *family_str(sa_family_t family)
762{
763	switch (family) {
764	case AF_INET:
765		return "IPv4";
766	case AF_INET6:
767		return "IPv6";
768	default:
769		return "unknown";
770	}
771}
772
773static const char *sotype_str(int sotype)
774{
775	switch (sotype) {
776	case SOCK_STREAM:
777		return "TCP";
778	case SOCK_DGRAM:
779		return "UDP";
780	default:
781		return "unknown";
782	}
783}
784
785#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
786
787static void test_config(int sotype, sa_family_t family, bool inany)
788{
789	const struct test {
790		void (*fn)(int sotype, sa_family_t family);
791		const char *name;
792		bool no_inner_map;
793		int need_sotype;
794	} tests[] = {
795		TEST_INIT(test_err_inner_map,
796			  .no_inner_map = true),
797		TEST_INIT(test_err_skb_data),
798		TEST_INIT(test_err_sk_select_port),
799		TEST_INIT(test_pass),
800		TEST_INIT(test_syncookie,
801			  .need_sotype = SOCK_STREAM),
802		TEST_INIT(test_pass_on_err),
803		TEST_INIT(test_detach_bpf),
804	};
 
805	char s[MAX_TEST_NAME];
806	const struct test *t;
807
808	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
809		if (t->need_sotype && t->need_sotype != sotype)
810			continue; /* test not compatible with socket type */
811
812		snprintf(s, sizeof(s), "%s %s/%s %s %s",
813			 maptype_str(inner_map_type),
814			 family_str(family), sotype_str(sotype),
815			 inany ? "INANY" : "LOOPBACK", t->name);
816
817		if (!test__start_subtest(s))
818			continue;
819
 
 
 
 
 
 
 
 
 
820		setup_per_test(sotype, family, inany, t->no_inner_map);
821		t->fn(sotype, family);
822		cleanup_per_test(t->no_inner_map);
 
 
 
823	}
824}
825
826#define BIND_INANY true
827
828static void test_all(void)
829{
830	const struct config {
831		int sotype;
832		sa_family_t family;
833		bool inany;
834	} configs[] = {
835		{ SOCK_STREAM, AF_INET },
836		{ SOCK_STREAM, AF_INET, BIND_INANY },
837		{ SOCK_STREAM, AF_INET6 },
838		{ SOCK_STREAM, AF_INET6, BIND_INANY },
839		{ SOCK_DGRAM, AF_INET },
840		{ SOCK_DGRAM, AF_INET6 },
841	};
842	const struct config *c;
843
844	for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
845		test_config(c->sotype, c->family, c->inany);
846}
847
848void test_map_type(enum bpf_map_type mt)
849{
850	if (create_maps(mt))
851		goto out;
852	if (prepare_bpf_obj())
853		goto out;
854
855	test_all();
856out:
857	cleanup();
858}
859
860void test_select_reuseport(void)
861{
862	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
863	if (saved_tcp_fo < 0)
864		goto out;
865	saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
866	if (saved_tcp_syncookie < 0)
867		goto out;
868
869	if (enable_fastopen())
870		goto out;
871	if (disable_syncookie())
872		goto out;
873
874	test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
875	test_map_type(BPF_MAP_TYPE_SOCKMAP);
876	test_map_type(BPF_MAP_TYPE_SOCKHASH);
877out:
878	restore_sysctls();
879}