Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (C) 2017 Cavium, Inc.
3 */
4#include <linux/bpf.h>
5#include <linux/netlink.h>
6#include <linux/rtnetlink.h>
7#include <assert.h>
8#include <errno.h>
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/socket.h>
14#include <unistd.h>
15#include <bpf/bpf.h>
16#include <arpa/inet.h>
17#include <fcntl.h>
18#include <poll.h>
19#include <net/if.h>
20#include <netdb.h>
21#include <sys/ioctl.h>
22#include <sys/syscall.h>
23#include "bpf_util.h"
24#include <bpf/libbpf.h>
25#include <libgen.h>
26#include <getopt.h>
27#include <pthread.h>
28#include "xdp_sample_user.h"
29#include "xdp_router_ipv4.skel.h"
30
31static const char *__doc__ =
32"XDP IPv4 router implementation\n"
33"Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
34
35static char buf[8192];
36static int lpm_map_fd;
37static int arp_table_map_fd;
38static int exact_match_map_fd;
39static int tx_port_map_fd;
40
41static bool routes_thread_exit;
42static int interval = 5;
43
44static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
45 SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT;
46
47DEFINE_SAMPLE_INIT(xdp_router_ipv4);
48
49static const struct option long_options[] = {
50 { "help", no_argument, NULL, 'h' },
51 { "skb-mode", no_argument, NULL, 'S' },
52 { "force", no_argument, NULL, 'F' },
53 { "interval", required_argument, NULL, 'i' },
54 { "verbose", no_argument, NULL, 'v' },
55 { "stats", no_argument, NULL, 's' },
56 {}
57};
58
59static int get_route_table(int rtm_family);
60
61static int recv_msg(struct sockaddr_nl sock_addr, int sock)
62{
63 struct nlmsghdr *nh;
64 int len, nll = 0;
65 char *buf_ptr;
66
67 buf_ptr = buf;
68 while (1) {
69 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
70 if (len < 0)
71 return len;
72
73 nh = (struct nlmsghdr *)buf_ptr;
74
75 if (nh->nlmsg_type == NLMSG_DONE)
76 break;
77 buf_ptr += len;
78 nll += len;
79 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
80 break;
81
82 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
83 break;
84 }
85 return nll;
86}
87
88/* Function to parse the route entry returned by netlink
89 * Updates the route entry related map entries
90 */
91static void read_route(struct nlmsghdr *nh, int nll)
92{
93 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
94 struct bpf_lpm_trie_key_u8 *prefix_key;
95 struct rtattr *rt_attr;
96 struct rtmsg *rt_msg;
97 int rtm_family;
98 int rtl;
99 int i;
100 struct route_table {
101 int dst_len, iface, metric;
102 __be32 dst, gw;
103 __be64 mac;
104 } route;
105 struct arp_table {
106 __be64 mac;
107 __be32 dst;
108 };
109
110 struct direct_map {
111 struct arp_table arp;
112 int ifindex;
113 __be64 mac;
114 } direct_entry;
115
116 memset(&route, 0, sizeof(route));
117 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
118 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
119 rtm_family = rt_msg->rtm_family;
120 if (rtm_family == AF_INET)
121 if (rt_msg->rtm_table != RT_TABLE_MAIN)
122 continue;
123 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
124 rtl = RTM_PAYLOAD(nh);
125
126 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
127 switch (rt_attr->rta_type) {
128 case NDA_DST:
129 sprintf(dsts, "%u",
130 (*((__be32 *)RTA_DATA(rt_attr))));
131 break;
132 case RTA_GATEWAY:
133 sprintf(gws, "%u",
134 *((__be32 *)RTA_DATA(rt_attr)));
135 break;
136 case RTA_OIF:
137 sprintf(ifs, "%u",
138 *((int *)RTA_DATA(rt_attr)));
139 break;
140 case RTA_METRICS:
141 sprintf(metrics, "%u",
142 *((int *)RTA_DATA(rt_attr)));
143 default:
144 break;
145 }
146 }
147 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
148 route.dst = atoi(dsts);
149 route.dst_len = atoi(dsts_len);
150 route.gw = atoi(gws);
151 route.iface = atoi(ifs);
152 route.metric = atoi(metrics);
153 assert(get_mac_addr(route.iface, &route.mac) == 0);
154 assert(bpf_map_update_elem(tx_port_map_fd,
155 &route.iface, &route.iface, 0) == 0);
156 if (rtm_family == AF_INET) {
157 struct trie_value {
158 __u8 prefix[4];
159 __be64 value;
160 int ifindex;
161 int metric;
162 __be32 gw;
163 } *prefix_value;
164
165 prefix_key = alloca(sizeof(*prefix_key) + 4);
166 prefix_value = alloca(sizeof(*prefix_value));
167
168 prefix_key->prefixlen = 32;
169 prefix_key->prefixlen = route.dst_len;
170 direct_entry.mac = route.mac & 0xffffffffffff;
171 direct_entry.ifindex = route.iface;
172 direct_entry.arp.mac = 0;
173 direct_entry.arp.dst = 0;
174 if (route.dst_len == 32) {
175 if (nh->nlmsg_type == RTM_DELROUTE) {
176 assert(bpf_map_delete_elem(exact_match_map_fd,
177 &route.dst) == 0);
178 } else {
179 if (bpf_map_lookup_elem(arp_table_map_fd,
180 &route.dst,
181 &direct_entry.arp.mac) == 0)
182 direct_entry.arp.dst = route.dst;
183 assert(bpf_map_update_elem(exact_match_map_fd,
184 &route.dst,
185 &direct_entry, 0) == 0);
186 }
187 }
188 for (i = 0; i < 4; i++)
189 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
190
191 if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
192 prefix_value) < 0) {
193 for (i = 0; i < 4; i++)
194 prefix_value->prefix[i] = prefix_key->data[i];
195 prefix_value->value = route.mac & 0xffffffffffff;
196 prefix_value->ifindex = route.iface;
197 prefix_value->gw = route.gw;
198 prefix_value->metric = route.metric;
199
200 assert(bpf_map_update_elem(lpm_map_fd,
201 prefix_key,
202 prefix_value, 0
203 ) == 0);
204 } else {
205 if (nh->nlmsg_type == RTM_DELROUTE) {
206 assert(bpf_map_delete_elem(lpm_map_fd,
207 prefix_key
208 ) == 0);
209 /* Rereading the route table to check if
210 * there is an entry with the same
211 * prefix but a different metric as the
212 * deleted entry.
213 */
214 get_route_table(AF_INET);
215 } else if (prefix_key->data[0] ==
216 prefix_value->prefix[0] &&
217 prefix_key->data[1] ==
218 prefix_value->prefix[1] &&
219 prefix_key->data[2] ==
220 prefix_value->prefix[2] &&
221 prefix_key->data[3] ==
222 prefix_value->prefix[3] &&
223 route.metric >= prefix_value->metric) {
224 continue;
225 } else {
226 for (i = 0; i < 4; i++)
227 prefix_value->prefix[i] =
228 prefix_key->data[i];
229 prefix_value->value =
230 route.mac & 0xffffffffffff;
231 prefix_value->ifindex = route.iface;
232 prefix_value->gw = route.gw;
233 prefix_value->metric = route.metric;
234 assert(bpf_map_update_elem(lpm_map_fd,
235 prefix_key,
236 prefix_value,
237 0) == 0);
238 }
239 }
240 }
241 memset(&route, 0, sizeof(route));
242 memset(dsts, 0, sizeof(dsts));
243 memset(dsts_len, 0, sizeof(dsts_len));
244 memset(gws, 0, sizeof(gws));
245 memset(ifs, 0, sizeof(ifs));
246 memset(&route, 0, sizeof(route));
247 }
248}
249
250/* Function to read the existing route table when the process is launched*/
251static int get_route_table(int rtm_family)
252{
253 struct sockaddr_nl sa;
254 struct nlmsghdr *nh;
255 int sock, seq = 0;
256 struct msghdr msg;
257 struct iovec iov;
258 int ret = 0;
259 int nll;
260
261 struct {
262 struct nlmsghdr nl;
263 struct rtmsg rt;
264 char buf[8192];
265 } req;
266
267 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
268 if (sock < 0) {
269 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
270 return -errno;
271 }
272 memset(&sa, 0, sizeof(sa));
273 sa.nl_family = AF_NETLINK;
274 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
275 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
276 ret = -errno;
277 goto cleanup;
278 }
279 memset(&req, 0, sizeof(req));
280 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
281 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
282 req.nl.nlmsg_type = RTM_GETROUTE;
283
284 req.rt.rtm_family = rtm_family;
285 req.rt.rtm_table = RT_TABLE_MAIN;
286 req.nl.nlmsg_pid = 0;
287 req.nl.nlmsg_seq = ++seq;
288 memset(&msg, 0, sizeof(msg));
289 iov.iov_base = (void *)&req.nl;
290 iov.iov_len = req.nl.nlmsg_len;
291 msg.msg_iov = &iov;
292 msg.msg_iovlen = 1;
293 ret = sendmsg(sock, &msg, 0);
294 if (ret < 0) {
295 fprintf(stderr, "send to netlink: %s\n", strerror(errno));
296 ret = -errno;
297 goto cleanup;
298 }
299 memset(buf, 0, sizeof(buf));
300 nll = recv_msg(sa, sock);
301 if (nll < 0) {
302 fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
303 ret = nll;
304 goto cleanup;
305 }
306 nh = (struct nlmsghdr *)buf;
307 read_route(nh, nll);
308cleanup:
309 close(sock);
310 return ret;
311}
312
313/* Function to parse the arp entry returned by netlink
314 * Updates the arp entry related map entries
315 */
316static void read_arp(struct nlmsghdr *nh, int nll)
317{
318 struct rtattr *rt_attr;
319 char dsts[24], mac[24];
320 struct ndmsg *rt_msg;
321 int rtl, ndm_family;
322
323 struct arp_table {
324 __be64 mac;
325 __be32 dst;
326 } arp_entry;
327 struct direct_map {
328 struct arp_table arp;
329 int ifindex;
330 __be64 mac;
331 } direct_entry;
332
333 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
334 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
335 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
336 ndm_family = rt_msg->ndm_family;
337 rtl = RTM_PAYLOAD(nh);
338 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
339 switch (rt_attr->rta_type) {
340 case NDA_DST:
341 sprintf(dsts, "%u",
342 *((__be32 *)RTA_DATA(rt_attr)));
343 break;
344 case NDA_LLADDR:
345 sprintf(mac, "%lld",
346 *((__be64 *)RTA_DATA(rt_attr)));
347 break;
348 default:
349 break;
350 }
351 }
352 arp_entry.dst = atoi(dsts);
353 arp_entry.mac = atol(mac);
354
355 if (ndm_family == AF_INET) {
356 if (bpf_map_lookup_elem(exact_match_map_fd,
357 &arp_entry.dst,
358 &direct_entry) == 0) {
359 if (nh->nlmsg_type == RTM_DELNEIGH) {
360 direct_entry.arp.dst = 0;
361 direct_entry.arp.mac = 0;
362 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
363 direct_entry.arp.dst = arp_entry.dst;
364 direct_entry.arp.mac = arp_entry.mac;
365 }
366 assert(bpf_map_update_elem(exact_match_map_fd,
367 &arp_entry.dst,
368 &direct_entry, 0
369 ) == 0);
370 memset(&direct_entry, 0, sizeof(direct_entry));
371 }
372 if (nh->nlmsg_type == RTM_DELNEIGH) {
373 assert(bpf_map_delete_elem(arp_table_map_fd,
374 &arp_entry.dst) == 0);
375 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
376 assert(bpf_map_update_elem(arp_table_map_fd,
377 &arp_entry.dst,
378 &arp_entry.mac, 0
379 ) == 0);
380 }
381 }
382 memset(&arp_entry, 0, sizeof(arp_entry));
383 memset(dsts, 0, sizeof(dsts));
384 }
385}
386
387/* Function to read the existing arp table when the process is launched*/
388static int get_arp_table(int rtm_family)
389{
390 struct sockaddr_nl sa;
391 struct nlmsghdr *nh;
392 int sock, seq = 0;
393 struct msghdr msg;
394 struct iovec iov;
395 int ret = 0;
396 int nll;
397 struct {
398 struct nlmsghdr nl;
399 struct ndmsg rt;
400 char buf[8192];
401 } req;
402
403 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
404 if (sock < 0) {
405 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
406 return -errno;
407 }
408 memset(&sa, 0, sizeof(sa));
409 sa.nl_family = AF_NETLINK;
410 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
411 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
412 ret = -errno;
413 goto cleanup;
414 }
415 memset(&req, 0, sizeof(req));
416 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
417 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
418 req.nl.nlmsg_type = RTM_GETNEIGH;
419 req.rt.ndm_state = NUD_REACHABLE;
420 req.rt.ndm_family = rtm_family;
421 req.nl.nlmsg_pid = 0;
422 req.nl.nlmsg_seq = ++seq;
423 memset(&msg, 0, sizeof(msg));
424 iov.iov_base = (void *)&req.nl;
425 iov.iov_len = req.nl.nlmsg_len;
426 msg.msg_iov = &iov;
427 msg.msg_iovlen = 1;
428 ret = sendmsg(sock, &msg, 0);
429 if (ret < 0) {
430 fprintf(stderr, "send to netlink: %s\n", strerror(errno));
431 ret = -errno;
432 goto cleanup;
433 }
434 memset(buf, 0, sizeof(buf));
435 nll = recv_msg(sa, sock);
436 if (nll < 0) {
437 fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
438 ret = nll;
439 goto cleanup;
440 }
441 nh = (struct nlmsghdr *)buf;
442 read_arp(nh, nll);
443cleanup:
444 close(sock);
445 return ret;
446}
447
448/* Function to keep track and update changes in route and arp table
449 * Give regular statistics of packets forwarded
450 */
451static void *monitor_routes_thread(void *arg)
452{
453 struct pollfd fds_route, fds_arp;
454 struct sockaddr_nl la, lr;
455 int sock, sock_arp, nll;
456 struct nlmsghdr *nh;
457
458 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
459 if (sock < 0) {
460 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
461 return NULL;
462 }
463
464 fcntl(sock, F_SETFL, O_NONBLOCK);
465 memset(&lr, 0, sizeof(lr));
466 lr.nl_family = AF_NETLINK;
467 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
468 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
469 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
470 close(sock);
471 return NULL;
472 }
473
474 fds_route.fd = sock;
475 fds_route.events = POLL_IN;
476
477 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
478 if (sock_arp < 0) {
479 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
480 close(sock);
481 return NULL;
482 }
483
484 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
485 memset(&la, 0, sizeof(la));
486 la.nl_family = AF_NETLINK;
487 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
488 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
489 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
490 goto cleanup;
491 }
492
493 fds_arp.fd = sock_arp;
494 fds_arp.events = POLL_IN;
495
496 /* dump route and arp tables */
497 if (get_arp_table(AF_INET) < 0) {
498 fprintf(stderr, "Failed reading arp table\n");
499 goto cleanup;
500 }
501
502 if (get_route_table(AF_INET) < 0) {
503 fprintf(stderr, "Failed reading route table\n");
504 goto cleanup;
505 }
506
507 while (!routes_thread_exit) {
508 memset(buf, 0, sizeof(buf));
509 if (poll(&fds_route, 1, 3) == POLL_IN) {
510 nll = recv_msg(lr, sock);
511 if (nll < 0) {
512 fprintf(stderr, "recv from netlink: %s\n",
513 strerror(nll));
514 goto cleanup;
515 }
516
517 nh = (struct nlmsghdr *)buf;
518 read_route(nh, nll);
519 }
520
521 memset(buf, 0, sizeof(buf));
522 if (poll(&fds_arp, 1, 3) == POLL_IN) {
523 nll = recv_msg(la, sock_arp);
524 if (nll < 0) {
525 fprintf(stderr, "recv from netlink: %s\n",
526 strerror(nll));
527 goto cleanup;
528 }
529
530 nh = (struct nlmsghdr *)buf;
531 read_arp(nh, nll);
532 }
533
534 sleep(interval);
535 }
536
537cleanup:
538 close(sock_arp);
539 close(sock);
540 return NULL;
541}
542
543static void usage(char *argv[], const struct option *long_options,
544 const char *doc, int mask, bool error,
545 struct bpf_object *obj)
546{
547 sample_usage(argv, long_options, doc, mask, error);
548}
549
550int main(int argc, char **argv)
551{
552 bool error = true, generic = false, force = false;
553 int opt, ret = EXIT_FAIL_BPF;
554 struct xdp_router_ipv4 *skel;
555 int i, total_ifindex = argc - 1;
556 char **ifname_list = argv + 1;
557 pthread_t routes_thread;
558 int longindex = 0;
559
560 if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
561 fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
562 strerror(errno));
563 goto end;
564 }
565
566 skel = xdp_router_ipv4__open();
567 if (!skel) {
568 fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n",
569 strerror(errno));
570 goto end;
571 }
572
573 ret = sample_init_pre_load(skel);
574 if (ret < 0) {
575 fprintf(stderr, "Failed to sample_init_pre_load: %s\n",
576 strerror(-ret));
577 ret = EXIT_FAIL_BPF;
578 goto end_destroy;
579 }
580
581 ret = xdp_router_ipv4__load(skel);
582 if (ret < 0) {
583 fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n",
584 strerror(errno));
585 goto end_destroy;
586 }
587
588 ret = sample_init(skel, mask);
589 if (ret < 0) {
590 fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
591 ret = EXIT_FAIL;
592 goto end_destroy;
593 }
594
595 while ((opt = getopt_long(argc, argv, "si:SFvh",
596 long_options, &longindex)) != -1) {
597 switch (opt) {
598 case 's':
599 mask |= SAMPLE_REDIRECT_MAP_CNT;
600 total_ifindex--;
601 ifname_list++;
602 break;
603 case 'i':
604 interval = strtoul(optarg, NULL, 0);
605 total_ifindex -= 2;
606 ifname_list += 2;
607 break;
608 case 'S':
609 generic = true;
610 total_ifindex--;
611 ifname_list++;
612 break;
613 case 'F':
614 force = true;
615 total_ifindex--;
616 ifname_list++;
617 break;
618 case 'v':
619 sample_switch_mode();
620 total_ifindex--;
621 ifname_list++;
622 break;
623 case 'h':
624 error = false;
625 default:
626 usage(argv, long_options, __doc__, mask, error, skel->obj);
627 goto end_destroy;
628 }
629 }
630
631 ret = EXIT_FAIL_OPTION;
632 if (optind == argc) {
633 usage(argv, long_options, __doc__, mask, true, skel->obj);
634 goto end_destroy;
635 }
636
637 lpm_map_fd = bpf_map__fd(skel->maps.lpm_map);
638 if (lpm_map_fd < 0) {
639 fprintf(stderr, "Failed loading lpm_map %s\n",
640 strerror(-lpm_map_fd));
641 goto end_destroy;
642 }
643 arp_table_map_fd = bpf_map__fd(skel->maps.arp_table);
644 if (arp_table_map_fd < 0) {
645 fprintf(stderr, "Failed loading arp_table_map_fd %s\n",
646 strerror(-arp_table_map_fd));
647 goto end_destroy;
648 }
649 exact_match_map_fd = bpf_map__fd(skel->maps.exact_match);
650 if (exact_match_map_fd < 0) {
651 fprintf(stderr, "Failed loading exact_match_map_fd %s\n",
652 strerror(-exact_match_map_fd));
653 goto end_destroy;
654 }
655 tx_port_map_fd = bpf_map__fd(skel->maps.tx_port);
656 if (tx_port_map_fd < 0) {
657 fprintf(stderr, "Failed loading tx_port_map_fd %s\n",
658 strerror(-tx_port_map_fd));
659 goto end_destroy;
660 }
661
662 ret = EXIT_FAIL_XDP;
663 for (i = 0; i < total_ifindex; i++) {
664 int index = if_nametoindex(ifname_list[i]);
665
666 if (!index) {
667 fprintf(stderr, "Interface %s not found %s\n",
668 ifname_list[i], strerror(-tx_port_map_fd));
669 goto end_destroy;
670 }
671 if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog,
672 index, generic, force) < 0)
673 goto end_destroy;
674 }
675
676 ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL);
677 if (ret) {
678 fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret));
679 ret = EXIT_FAIL;
680 goto end_destroy;
681 }
682
683 ret = sample_run(interval, NULL, NULL);
684 routes_thread_exit = true;
685
686 if (ret < 0) {
687 fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
688 ret = EXIT_FAIL;
689 goto end_thread_wait;
690 }
691 ret = EXIT_OK;
692
693end_thread_wait:
694 pthread_join(routes_thread, NULL);
695end_destroy:
696 xdp_router_ipv4__destroy(skel);
697end:
698 sample_exit(ret);
699}
1/* Copyright (C) 2017 Cavium, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
6 */
7#include <linux/bpf.h>
8#include <linux/netlink.h>
9#include <linux/rtnetlink.h>
10#include <assert.h>
11#include <errno.h>
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <sys/socket.h>
17#include <unistd.h>
18#include "bpf_load.h"
19#include "libbpf.h"
20#include <arpa/inet.h>
21#include <fcntl.h>
22#include <poll.h>
23#include <net/if.h>
24#include <netdb.h>
25#include <sys/ioctl.h>
26#include <sys/syscall.h>
27#include "bpf_util.h"
28
29int sock, sock_arp, flags = 0;
30static int total_ifindex;
31int *ifindex_list;
32char buf[8192];
33
34static int get_route_table(int rtm_family);
35static void int_exit(int sig)
36{
37 int i = 0;
38
39 for (i = 0; i < total_ifindex; i++)
40 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
41 exit(0);
42}
43
44static void close_and_exit(int sig)
45{
46 int i = 0;
47
48 close(sock);
49 close(sock_arp);
50
51 for (i = 0; i < total_ifindex; i++)
52 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
53 exit(0);
54}
55
56/* Get the mac address of the interface given interface name */
57static __be64 getmac(char *iface)
58{
59 struct ifreq ifr;
60 __be64 mac = 0;
61 int fd, i;
62
63 fd = socket(AF_INET, SOCK_DGRAM, 0);
64 ifr.ifr_addr.sa_family = AF_INET;
65 strncpy(ifr.ifr_name, iface, IFNAMSIZ - 1);
66 if (ioctl(fd, SIOCGIFHWADDR, &ifr) < 0) {
67 printf("ioctl failed leaving....\n");
68 return -1;
69 }
70 for (i = 0; i < 6 ; i++)
71 *((__u8 *)&mac + i) = (__u8)ifr.ifr_hwaddr.sa_data[i];
72 close(fd);
73 return mac;
74}
75
76static int recv_msg(struct sockaddr_nl sock_addr, int sock)
77{
78 struct nlmsghdr *nh;
79 int len, nll = 0;
80 char *buf_ptr;
81
82 buf_ptr = buf;
83 while (1) {
84 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
85 if (len < 0)
86 return len;
87
88 nh = (struct nlmsghdr *)buf_ptr;
89
90 if (nh->nlmsg_type == NLMSG_DONE)
91 break;
92 buf_ptr += len;
93 nll += len;
94 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
95 break;
96
97 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
98 break;
99 }
100 return nll;
101}
102
103/* Function to parse the route entry returned by netlink
104 * Updates the route entry related map entries
105 */
106static void read_route(struct nlmsghdr *nh, int nll)
107{
108 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
109 struct bpf_lpm_trie_key *prefix_key;
110 struct rtattr *rt_attr;
111 struct rtmsg *rt_msg;
112 int rtm_family;
113 int rtl;
114 int i;
115 struct route_table {
116 int dst_len, iface, metric;
117 char *iface_name;
118 __be32 dst, gw;
119 __be64 mac;
120 } route;
121 struct arp_table {
122 __be64 mac;
123 __be32 dst;
124 };
125
126 struct direct_map {
127 struct arp_table arp;
128 int ifindex;
129 __be64 mac;
130 } direct_entry;
131
132 if (nh->nlmsg_type == RTM_DELROUTE)
133 printf("DELETING Route entry\n");
134 else if (nh->nlmsg_type == RTM_GETROUTE)
135 printf("READING Route entry\n");
136 else if (nh->nlmsg_type == RTM_NEWROUTE)
137 printf("NEW Route entry\n");
138 else
139 printf("%d\n", nh->nlmsg_type);
140
141 memset(&route, 0, sizeof(route));
142 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
143 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
144 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
145 rtm_family = rt_msg->rtm_family;
146 if (rtm_family == AF_INET)
147 if (rt_msg->rtm_table != RT_TABLE_MAIN)
148 continue;
149 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
150 rtl = RTM_PAYLOAD(nh);
151
152 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
153 switch (rt_attr->rta_type) {
154 case NDA_DST:
155 sprintf(dsts, "%u",
156 (*((__be32 *)RTA_DATA(rt_attr))));
157 break;
158 case RTA_GATEWAY:
159 sprintf(gws, "%u",
160 *((__be32 *)RTA_DATA(rt_attr)));
161 break;
162 case RTA_OIF:
163 sprintf(ifs, "%u",
164 *((int *)RTA_DATA(rt_attr)));
165 break;
166 case RTA_METRICS:
167 sprintf(metrics, "%u",
168 *((int *)RTA_DATA(rt_attr)));
169 default:
170 break;
171 }
172 }
173 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
174 route.dst = atoi(dsts);
175 route.dst_len = atoi(dsts_len);
176 route.gw = atoi(gws);
177 route.iface = atoi(ifs);
178 route.metric = atoi(metrics);
179 route.iface_name = alloca(sizeof(char *) * IFNAMSIZ);
180 route.iface_name = if_indextoname(route.iface, route.iface_name);
181 route.mac = getmac(route.iface_name);
182 if (route.mac == -1) {
183 int i = 0;
184
185 for (i = 0; i < total_ifindex; i++)
186 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
187 exit(0);
188 }
189 assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0);
190 if (rtm_family == AF_INET) {
191 struct trie_value {
192 __u8 prefix[4];
193 __be64 value;
194 int ifindex;
195 int metric;
196 __be32 gw;
197 } *prefix_value;
198
199 prefix_key = alloca(sizeof(*prefix_key) + 3);
200 prefix_value = alloca(sizeof(*prefix_value));
201
202 prefix_key->prefixlen = 32;
203 prefix_key->prefixlen = route.dst_len;
204 direct_entry.mac = route.mac & 0xffffffffffff;
205 direct_entry.ifindex = route.iface;
206 direct_entry.arp.mac = 0;
207 direct_entry.arp.dst = 0;
208 if (route.dst_len == 32) {
209 if (nh->nlmsg_type == RTM_DELROUTE) {
210 assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0);
211 } else {
212 if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0)
213 direct_entry.arp.dst = route.dst;
214 assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0);
215 }
216 }
217 for (i = 0; i < 4; i++)
218 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
219
220 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
221 (int)prefix_key->data[0],
222 (int)prefix_key->data[1],
223 (int)prefix_key->data[2],
224 (int)prefix_key->data[3],
225 route.gw, route.dst_len,
226 route.metric,
227 route.iface_name);
228 if (bpf_map_lookup_elem(map_fd[0], prefix_key,
229 prefix_value) < 0) {
230 for (i = 0; i < 4; i++)
231 prefix_value->prefix[i] = prefix_key->data[i];
232 prefix_value->value = route.mac & 0xffffffffffff;
233 prefix_value->ifindex = route.iface;
234 prefix_value->gw = route.gw;
235 prefix_value->metric = route.metric;
236
237 assert(bpf_map_update_elem(map_fd[0],
238 prefix_key,
239 prefix_value, 0
240 ) == 0);
241 } else {
242 if (nh->nlmsg_type == RTM_DELROUTE) {
243 printf("deleting entry\n");
244 printf("prefix key=%d.%d.%d.%d/%d",
245 prefix_key->data[0],
246 prefix_key->data[1],
247 prefix_key->data[2],
248 prefix_key->data[3],
249 prefix_key->prefixlen);
250 assert(bpf_map_delete_elem(map_fd[0],
251 prefix_key
252 ) == 0);
253 /* Rereading the route table to check if
254 * there is an entry with the same
255 * prefix but a different metric as the
256 * deleted enty.
257 */
258 get_route_table(AF_INET);
259 } else if (prefix_key->data[0] ==
260 prefix_value->prefix[0] &&
261 prefix_key->data[1] ==
262 prefix_value->prefix[1] &&
263 prefix_key->data[2] ==
264 prefix_value->prefix[2] &&
265 prefix_key->data[3] ==
266 prefix_value->prefix[3] &&
267 route.metric >= prefix_value->metric) {
268 continue;
269 } else {
270 for (i = 0; i < 4; i++)
271 prefix_value->prefix[i] =
272 prefix_key->data[i];
273 prefix_value->value =
274 route.mac & 0xffffffffffff;
275 prefix_value->ifindex = route.iface;
276 prefix_value->gw = route.gw;
277 prefix_value->metric = route.metric;
278 assert(bpf_map_update_elem(
279 map_fd[0],
280 prefix_key,
281 prefix_value,
282 0) == 0);
283 }
284 }
285 }
286 memset(&route, 0, sizeof(route));
287 memset(dsts, 0, sizeof(dsts));
288 memset(dsts_len, 0, sizeof(dsts_len));
289 memset(gws, 0, sizeof(gws));
290 memset(ifs, 0, sizeof(ifs));
291 memset(&route, 0, sizeof(route));
292 }
293}
294
295/* Function to read the existing route table when the process is launched*/
296static int get_route_table(int rtm_family)
297{
298 struct sockaddr_nl sa;
299 struct nlmsghdr *nh;
300 int sock, seq = 0;
301 struct msghdr msg;
302 struct iovec iov;
303 int ret = 0;
304 int nll;
305
306 struct {
307 struct nlmsghdr nl;
308 struct rtmsg rt;
309 char buf[8192];
310 } req;
311
312 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
313 if (sock < 0) {
314 printf("open netlink socket: %s\n", strerror(errno));
315 return -1;
316 }
317 memset(&sa, 0, sizeof(sa));
318 sa.nl_family = AF_NETLINK;
319 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
320 printf("bind to netlink: %s\n", strerror(errno));
321 ret = -1;
322 goto cleanup;
323 }
324 memset(&req, 0, sizeof(req));
325 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
326 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
327 req.nl.nlmsg_type = RTM_GETROUTE;
328
329 req.rt.rtm_family = rtm_family;
330 req.rt.rtm_table = RT_TABLE_MAIN;
331 req.nl.nlmsg_pid = 0;
332 req.nl.nlmsg_seq = ++seq;
333 memset(&msg, 0, sizeof(msg));
334 iov.iov_base = (void *)&req.nl;
335 iov.iov_len = req.nl.nlmsg_len;
336 msg.msg_iov = &iov;
337 msg.msg_iovlen = 1;
338 ret = sendmsg(sock, &msg, 0);
339 if (ret < 0) {
340 printf("send to netlink: %s\n", strerror(errno));
341 ret = -1;
342 goto cleanup;
343 }
344 memset(buf, 0, sizeof(buf));
345 nll = recv_msg(sa, sock);
346 if (nll < 0) {
347 printf("recv from netlink: %s\n", strerror(nll));
348 ret = -1;
349 goto cleanup;
350 }
351 nh = (struct nlmsghdr *)buf;
352 read_route(nh, nll);
353cleanup:
354 close(sock);
355 return ret;
356}
357
358/* Function to parse the arp entry returned by netlink
359 * Updates the arp entry related map entries
360 */
361static void read_arp(struct nlmsghdr *nh, int nll)
362{
363 struct rtattr *rt_attr;
364 char dsts[24], mac[24];
365 struct ndmsg *rt_msg;
366 int rtl, ndm_family;
367
368 struct arp_table {
369 __be64 mac;
370 __be32 dst;
371 } arp_entry;
372 struct direct_map {
373 struct arp_table arp;
374 int ifindex;
375 __be64 mac;
376 } direct_entry;
377
378 if (nh->nlmsg_type == RTM_GETNEIGH)
379 printf("READING arp entry\n");
380 printf("Address\tHwAddress\n");
381 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
382 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
383 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
384 ndm_family = rt_msg->ndm_family;
385 rtl = RTM_PAYLOAD(nh);
386 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
387 switch (rt_attr->rta_type) {
388 case NDA_DST:
389 sprintf(dsts, "%u",
390 *((__be32 *)RTA_DATA(rt_attr)));
391 break;
392 case NDA_LLADDR:
393 sprintf(mac, "%lld",
394 *((__be64 *)RTA_DATA(rt_attr)));
395 break;
396 default:
397 break;
398 }
399 }
400 arp_entry.dst = atoi(dsts);
401 arp_entry.mac = atol(mac);
402 printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac);
403 if (ndm_family == AF_INET) {
404 if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst,
405 &direct_entry) == 0) {
406 if (nh->nlmsg_type == RTM_DELNEIGH) {
407 direct_entry.arp.dst = 0;
408 direct_entry.arp.mac = 0;
409 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
410 direct_entry.arp.dst = arp_entry.dst;
411 direct_entry.arp.mac = arp_entry.mac;
412 }
413 assert(bpf_map_update_elem(map_fd[3],
414 &arp_entry.dst,
415 &direct_entry, 0
416 ) == 0);
417 memset(&direct_entry, 0, sizeof(direct_entry));
418 }
419 if (nh->nlmsg_type == RTM_DELNEIGH) {
420 assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0);
421 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
422 assert(bpf_map_update_elem(map_fd[2],
423 &arp_entry.dst,
424 &arp_entry.mac, 0
425 ) == 0);
426 }
427 }
428 memset(&arp_entry, 0, sizeof(arp_entry));
429 memset(dsts, 0, sizeof(dsts));
430 }
431}
432
433/* Function to read the existing arp table when the process is launched*/
434static int get_arp_table(int rtm_family)
435{
436 struct sockaddr_nl sa;
437 struct nlmsghdr *nh;
438 int sock, seq = 0;
439 struct msghdr msg;
440 struct iovec iov;
441 int ret = 0;
442 int nll;
443 struct {
444 struct nlmsghdr nl;
445 struct ndmsg rt;
446 char buf[8192];
447 } req;
448
449 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
450 if (sock < 0) {
451 printf("open netlink socket: %s\n", strerror(errno));
452 return -1;
453 }
454 memset(&sa, 0, sizeof(sa));
455 sa.nl_family = AF_NETLINK;
456 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
457 printf("bind to netlink: %s\n", strerror(errno));
458 ret = -1;
459 goto cleanup;
460 }
461 memset(&req, 0, sizeof(req));
462 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
463 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
464 req.nl.nlmsg_type = RTM_GETNEIGH;
465 req.rt.ndm_state = NUD_REACHABLE;
466 req.rt.ndm_family = rtm_family;
467 req.nl.nlmsg_pid = 0;
468 req.nl.nlmsg_seq = ++seq;
469 memset(&msg, 0, sizeof(msg));
470 iov.iov_base = (void *)&req.nl;
471 iov.iov_len = req.nl.nlmsg_len;
472 msg.msg_iov = &iov;
473 msg.msg_iovlen = 1;
474 ret = sendmsg(sock, &msg, 0);
475 if (ret < 0) {
476 printf("send to netlink: %s\n", strerror(errno));
477 ret = -1;
478 goto cleanup;
479 }
480 memset(buf, 0, sizeof(buf));
481 nll = recv_msg(sa, sock);
482 if (nll < 0) {
483 printf("recv from netlink: %s\n", strerror(nll));
484 ret = -1;
485 goto cleanup;
486 }
487 nh = (struct nlmsghdr *)buf;
488 read_arp(nh, nll);
489cleanup:
490 close(sock);
491 return ret;
492}
493
494/* Function to keep track and update changes in route and arp table
495 * Give regular statistics of packets forwarded
496 */
497static int monitor_route(void)
498{
499 unsigned int nr_cpus = bpf_num_possible_cpus();
500 const unsigned int nr_keys = 256;
501 struct pollfd fds_route, fds_arp;
502 __u64 prev[nr_keys][nr_cpus];
503 struct sockaddr_nl la, lr;
504 __u64 values[nr_cpus];
505 struct nlmsghdr *nh;
506 int nll, ret = 0;
507 int interval = 5;
508 __u32 key;
509 int i;
510
511 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
512 if (sock < 0) {
513 printf("open netlink socket: %s\n", strerror(errno));
514 return -1;
515 }
516
517 fcntl(sock, F_SETFL, O_NONBLOCK);
518 memset(&lr, 0, sizeof(lr));
519 lr.nl_family = AF_NETLINK;
520 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
521 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
522 printf("bind to netlink: %s\n", strerror(errno));
523 ret = -1;
524 goto cleanup;
525 }
526 fds_route.fd = sock;
527 fds_route.events = POLL_IN;
528
529 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
530 if (sock_arp < 0) {
531 printf("open netlink socket: %s\n", strerror(errno));
532 return -1;
533 }
534
535 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
536 memset(&la, 0, sizeof(la));
537 la.nl_family = AF_NETLINK;
538 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
539 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
540 printf("bind to netlink: %s\n", strerror(errno));
541 ret = -1;
542 goto cleanup;
543 }
544 fds_arp.fd = sock_arp;
545 fds_arp.events = POLL_IN;
546
547 memset(prev, 0, sizeof(prev));
548 do {
549 signal(SIGINT, close_and_exit);
550 signal(SIGTERM, close_and_exit);
551
552 sleep(interval);
553 for (key = 0; key < nr_keys; key++) {
554 __u64 sum = 0;
555
556 assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
557 for (i = 0; i < nr_cpus; i++)
558 sum += (values[i] - prev[key][i]);
559 if (sum)
560 printf("proto %u: %10llu pkt/s\n",
561 key, sum / interval);
562 memcpy(prev[key], values, sizeof(values));
563 }
564
565 memset(buf, 0, sizeof(buf));
566 if (poll(&fds_route, 1, 3) == POLL_IN) {
567 nll = recv_msg(lr, sock);
568 if (nll < 0) {
569 printf("recv from netlink: %s\n", strerror(nll));
570 ret = -1;
571 goto cleanup;
572 }
573
574 nh = (struct nlmsghdr *)buf;
575 printf("Routing table updated.\n");
576 read_route(nh, nll);
577 }
578 memset(buf, 0, sizeof(buf));
579 if (poll(&fds_arp, 1, 3) == POLL_IN) {
580 nll = recv_msg(la, sock_arp);
581 if (nll < 0) {
582 printf("recv from netlink: %s\n", strerror(nll));
583 ret = -1;
584 goto cleanup;
585 }
586
587 nh = (struct nlmsghdr *)buf;
588 read_arp(nh, nll);
589 }
590
591 } while (1);
592cleanup:
593 close(sock);
594 return ret;
595}
596
597int main(int ac, char **argv)
598{
599 char filename[256];
600 char **ifname_list;
601 int i = 1;
602
603 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
604 if (ac < 2) {
605 printf("usage: %s [-S] Interface name list\n", argv[0]);
606 return 1;
607 }
608 if (!strcmp(argv[1], "-S")) {
609 flags = XDP_FLAGS_SKB_MODE;
610 total_ifindex = ac - 2;
611 ifname_list = (argv + 2);
612 } else {
613 flags = 0;
614 total_ifindex = ac - 1;
615 ifname_list = (argv + 1);
616 }
617 if (load_bpf_file(filename)) {
618 printf("%s", bpf_log_buf);
619 return 1;
620 }
621 printf("\n**************loading bpf file*********************\n\n\n");
622 if (!prog_fd[0]) {
623 printf("load_bpf_file: %s\n", strerror(errno));
624 return 1;
625 }
626 ifindex_list = (int *)malloc(total_ifindex * sizeof(int *));
627 for (i = 0; i < total_ifindex; i++) {
628 ifindex_list[i] = if_nametoindex(ifname_list[i]);
629 if (!ifindex_list[i]) {
630 printf("Couldn't translate interface name: %s",
631 strerror(errno));
632 return 1;
633 }
634 }
635 for (i = 0; i < total_ifindex; i++) {
636 if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) {
637 printf("link set xdp fd failed\n");
638 int recovery_index = i;
639
640 for (i = 0; i < recovery_index; i++)
641 bpf_set_link_xdp_fd(ifindex_list[i], -1, flags);
642
643 return 1;
644 }
645 printf("Attached to %d\n", ifindex_list[i]);
646 }
647 signal(SIGINT, int_exit);
648 signal(SIGTERM, int_exit);
649
650 printf("*******************ROUTE TABLE*************************\n\n\n");
651 get_route_table(AF_INET);
652 printf("*******************ARP TABLE***************************\n\n\n");
653 get_arp_table(AF_INET);
654 if (monitor_route() < 0) {
655 printf("Error in receiving route update");
656 return 1;
657 }
658
659 return 0;
660}