Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
  2
  3/*
  4 * AF_XDP user-space access library.
  5 *
  6 * Copyright(c) 2018 - 2019 Intel Corporation.
  7 *
  8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
  9 */
 10
 11#include <errno.h>
 12#include <stdlib.h>
 13#include <string.h>
 14#include <unistd.h>
 15#include <arpa/inet.h>
 16#include <asm/barrier.h>
 17#include <linux/compiler.h>
 18#include <linux/ethtool.h>
 19#include <linux/filter.h>
 20#include <linux/if_ether.h>
 21#include <linux/if_link.h>
 22#include <linux/if_packet.h>
 23#include <linux/if_xdp.h>
 24#include <linux/kernel.h>
 25#include <linux/list.h>
 26#include <linux/netlink.h>
 27#include <linux/rtnetlink.h>
 28#include <linux/sockios.h>
 29#include <net/if.h>
 30#include <sys/ioctl.h>
 31#include <sys/mman.h>
 32#include <sys/socket.h>
 33#include <sys/types.h>
 
 34
 35#include <bpf/bpf.h>
 36#include <bpf/libbpf.h>
 37#include "xsk.h"
 38#include "bpf_util.h"
 39
 40#ifndef SOL_XDP
 41 #define SOL_XDP 283
 42#endif
 43
 44#ifndef AF_XDP
 45 #define AF_XDP 44
 46#endif
 47
 48#ifndef PF_XDP
 49 #define PF_XDP AF_XDP
 50#endif
 51
 52#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
 53
 54#define XSKMAP_SIZE 1
 
 
 
 55
 56struct xsk_umem {
 57	struct xsk_ring_prod *fill_save;
 58	struct xsk_ring_cons *comp_save;
 59	char *umem_area;
 60	struct xsk_umem_config config;
 61	int fd;
 62	int refcount;
 63	struct list_head ctx_list;
 64	bool rx_ring_setup_done;
 65	bool tx_ring_setup_done;
 66};
 67
 68struct xsk_ctx {
 69	struct xsk_ring_prod *fill;
 70	struct xsk_ring_cons *comp;
 71	__u32 queue_id;
 72	struct xsk_umem *umem;
 73	int refcount;
 74	int ifindex;
 75	struct list_head list;
 
 
 
 
 
 76};
 77
 78struct xsk_socket {
 79	struct xsk_ring_cons *rx;
 80	struct xsk_ring_prod *tx;
 
 81	struct xsk_ctx *ctx;
 82	struct xsk_socket_config config;
 83	int fd;
 84};
 85
 86struct nl_mtu_req {
 87	struct nlmsghdr nh;
 88	struct ifinfomsg msg;
 89	char             buf[512];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 90};
 91
 92int xsk_umem__fd(const struct xsk_umem *umem)
 93{
 94	return umem ? umem->fd : -EINVAL;
 95}
 96
 97int xsk_socket__fd(const struct xsk_socket *xsk)
 98{
 99	return xsk ? xsk->fd : -EINVAL;
100}
101
102static bool xsk_page_aligned(void *buffer)
103{
104	unsigned long addr = (unsigned long)buffer;
105
106	return !(addr & (getpagesize() - 1));
107}
108
109static void xsk_set_umem_config(struct xsk_umem_config *cfg,
110				const struct xsk_umem_config *usr_cfg)
111{
112	if (!usr_cfg) {
113		cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
114		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
115		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
116		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
117		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
118		cfg->tx_metadata_len = 0;
119		return;
120	}
121
122	cfg->fill_size = usr_cfg->fill_size;
123	cfg->comp_size = usr_cfg->comp_size;
124	cfg->frame_size = usr_cfg->frame_size;
125	cfg->frame_headroom = usr_cfg->frame_headroom;
126	cfg->flags = usr_cfg->flags;
127	cfg->tx_metadata_len = usr_cfg->tx_metadata_len;
128}
129
130static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
131				     const struct xsk_socket_config *usr_cfg)
132{
133	if (!usr_cfg) {
134		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
135		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 
 
136		cfg->bind_flags = 0;
137		return 0;
138	}
139
 
 
 
140	cfg->rx_size = usr_cfg->rx_size;
141	cfg->tx_size = usr_cfg->tx_size;
 
 
142	cfg->bind_flags = usr_cfg->bind_flags;
143
144	return 0;
145}
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
148{
149	socklen_t optlen;
150	int err;
151
152	optlen = sizeof(*off);
153	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
154	if (err)
155		return err;
156
157	if (optlen == sizeof(*off))
158		return 0;
159
 
 
 
 
 
160	return -EINVAL;
161}
162
163static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
164				 struct xsk_ring_prod *fill,
165				 struct xsk_ring_cons *comp)
166{
167	struct xdp_mmap_offsets off;
168	void *map;
169	int err;
170
171	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
172			 &umem->config.fill_size,
173			 sizeof(umem->config.fill_size));
174	if (err)
175		return -errno;
176
177	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
178			 &umem->config.comp_size,
179			 sizeof(umem->config.comp_size));
180	if (err)
181		return -errno;
182
183	err = xsk_get_mmap_offsets(fd, &off);
184	if (err)
185		return -errno;
186
187	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
188		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
189		   XDP_UMEM_PGOFF_FILL_RING);
190	if (map == MAP_FAILED)
191		return -errno;
192
193	fill->mask = umem->config.fill_size - 1;
194	fill->size = umem->config.fill_size;
195	fill->producer = map + off.fr.producer;
196	fill->consumer = map + off.fr.consumer;
197	fill->flags = map + off.fr.flags;
198	fill->ring = map + off.fr.desc;
199	fill->cached_cons = umem->config.fill_size;
200
201	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
202		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
203		   XDP_UMEM_PGOFF_COMPLETION_RING);
204	if (map == MAP_FAILED) {
205		err = -errno;
206		goto out_mmap;
207	}
208
209	comp->mask = umem->config.comp_size - 1;
210	comp->size = umem->config.comp_size;
211	comp->producer = map + off.cr.producer;
212	comp->consumer = map + off.cr.consumer;
213	comp->flags = map + off.cr.flags;
214	comp->ring = map + off.cr.desc;
215
216	return 0;
217
218out_mmap:
219	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
220	return err;
221}
222
223int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
224		     __u64 size, struct xsk_ring_prod *fill,
225		     struct xsk_ring_cons *comp,
226		     const struct xsk_umem_config *usr_config)
227{
228	struct xdp_umem_reg mr;
229	struct xsk_umem *umem;
230	int err;
231
232	if (!umem_area || !umem_ptr || !fill || !comp)
233		return -EFAULT;
234	if (!size && !xsk_page_aligned(umem_area))
235		return -EINVAL;
236
237	umem = calloc(1, sizeof(*umem));
238	if (!umem)
239		return -ENOMEM;
240
241	umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
242	if (umem->fd < 0) {
243		err = -errno;
244		goto out_umem_alloc;
245	}
246
247	umem->umem_area = umem_area;
248	INIT_LIST_HEAD(&umem->ctx_list);
249	xsk_set_umem_config(&umem->config, usr_config);
250
251	memset(&mr, 0, sizeof(mr));
252	mr.addr = (uintptr_t)umem_area;
253	mr.len = size;
254	mr.chunk_size = umem->config.frame_size;
255	mr.headroom = umem->config.frame_headroom;
256	mr.flags = umem->config.flags;
257	mr.tx_metadata_len = umem->config.tx_metadata_len;
258
259	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
260	if (err) {
261		err = -errno;
262		goto out_socket;
263	}
264
265	err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
266	if (err)
267		goto out_socket;
268
269	umem->fill_save = fill;
270	umem->comp_save = comp;
271	*umem_ptr = umem;
272	return 0;
273
274out_socket:
275	close(umem->fd);
276out_umem_alloc:
277	free(umem);
278	return err;
279}
280
281bool xsk_is_in_mode(u32 ifindex, int mode)
282{
283	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
284	int ret;
 
 
285
286	ret = bpf_xdp_query(ifindex, mode, &opts);
287	if (ret) {
288		printf("XDP mode query returned error %s\n", strerror(errno));
289		return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290	}
291
292	if (mode == XDP_FLAGS_DRV_MODE)
293		return opts.attach_mode == XDP_ATTACHED_DRV;
294	else if (mode == XDP_FLAGS_SKB_MODE)
295		return opts.attach_mode == XDP_ATTACHED_SKB;
 
 
 
 
 
 
 
 
 
 
296
297	return false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298}
299
300/* Lifted from netlink.c in tools/lib/bpf */
301static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
302{
303	int len;
 
 
 
 
304
305	do {
306		len = recvmsg(sock, mhdr, flags);
307	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
 
 
308
309	if (len < 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310		return -errno;
311	return len;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312}
313
314/* Lifted from netlink.c in tools/lib/bpf */
315static int alloc_iov(struct iovec *iov, int len)
316{
317	void *nbuf;
 
 
318
319	nbuf = realloc(iov->iov_base, len);
320	if (!nbuf)
321		return -ENOMEM;
 
 
 
 
 
 
 
322
323	iov->iov_base = nbuf;
324	iov->iov_len = len;
325	return 0;
326}
327
328/* Original version lifted from netlink.c in tools/lib/bpf */
329static int netlink_recv(int sock)
330{
331	struct iovec iov = {};
332	struct msghdr mhdr = {
333		.msg_iov = &iov,
334		.msg_iovlen = 1,
335	};
336	bool multipart = true;
337	struct nlmsgerr *err;
338	struct nlmsghdr *nh;
339	int len, ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
341	ret = alloc_iov(&iov, 4096);
342	if (ret)
343		goto done;
344
345	while (multipart) {
346		multipart = false;
347		len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
348		if (len < 0) {
349			ret = len;
350			goto done;
 
 
 
 
 
 
 
 
 
 
351		}
352
353		if (len > iov.iov_len) {
354			ret = alloc_iov(&iov, len);
355			if (ret)
356				goto done;
357		}
358
359		len = netlink_recvmsg(sock, &mhdr, 0);
360		if (len < 0) {
361			ret = len;
362			goto done;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363		}
364
365		if (len == 0)
 
 
 
 
 
366			break;
 
367
368		for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
369		     nh = NLMSG_NEXT(nh, len)) {
370			if (nh->nlmsg_flags & NLM_F_MULTI)
371				multipart = true;
372			switch (nh->nlmsg_type) {
373			case NLMSG_ERROR:
374				err = (struct nlmsgerr *)NLMSG_DATA(nh);
375				if (!err->error)
376					continue;
377				ret = err->error;
378				goto done;
379			case NLMSG_DONE:
380				ret = 0;
381				goto done;
382			default:
383				break;
384			}
385		}
 
386	}
387	ret = 0;
388done:
389	free(iov.iov_base);
390	return ret;
391}
392
393int xsk_set_mtu(int ifindex, int mtu)
394{
395	struct nl_mtu_req req;
396	struct rtattr *rta;
397	int fd, ret;
 
 
 
 
 
 
398
399	fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
400	if (fd < 0)
401		return fd;
402
403	memset(&req, 0, sizeof(req));
404	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
405	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
406	req.nh.nlmsg_type = RTM_NEWLINK;
407	req.msg.ifi_family = AF_UNSPEC;
408	req.msg.ifi_index = ifindex;
409	rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
410	rta->rta_type = IFLA_MTU;
411	rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
412	req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu));
413	memcpy(RTA_DATA(rta), &mtu, sizeof(mtu));
414
415	ret = send(fd, &req, req.nh.nlmsg_len, 0);
416	if (ret < 0) {
417		close(fd);
418		return errno;
 
 
 
 
 
 
419	}
420
421	ret = netlink_recv(fd);
422	close(fd);
423	return ret;
424}
425
426int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
427{
428	int prog_fd;
 
 
429
430	prog_fd = bpf_program__fd(prog);
431	return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432}
433
434void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
 
435{
436	bpf_xdp_detach(ifindex, xdp_flags, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437}
438
439void xsk_clear_xskmap(struct bpf_map *map)
440{
441	u32 index = 0;
442	int map_fd;
443
444	map_fd = bpf_map__fd(map);
445	bpf_map_delete_elem(map_fd, &index);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446}
447
448int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index)
449{
450	int map_fd, sock_fd;
 
 
 
451
452	map_fd = bpf_map__fd(map);
453	sock_fd = xsk_socket__fd(xsk);
 
 
454
455	return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456}
457
458static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
459				   __u32 queue_id)
460{
461	struct xsk_ctx *ctx;
462
463	if (list_empty(&umem->ctx_list))
464		return NULL;
465
466	list_for_each_entry(ctx, &umem->ctx_list, list) {
467		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
468			ctx->refcount++;
469			return ctx;
470		}
471	}
472
473	return NULL;
474}
475
476static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
477{
478	struct xsk_umem *umem = ctx->umem;
479	struct xdp_mmap_offsets off;
480	int err;
481
482	if (--ctx->refcount)
483		return;
484
485	if (!unmap)
486		goto out_free;
487
488	err = xsk_get_mmap_offsets(umem->fd, &off);
489	if (err)
490		goto out_free;
491
492	munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
493	       sizeof(__u64));
494	munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
495	       sizeof(__u64));
496
497out_free:
498	list_del(&ctx->list);
499	free(ctx);
500}
501
502static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
503				      struct xsk_umem *umem, int ifindex,
504				      __u32 queue_id,
505				      struct xsk_ring_prod *fill,
506				      struct xsk_ring_cons *comp)
507{
508	struct xsk_ctx *ctx;
509	int err;
510
511	ctx = calloc(1, sizeof(*ctx));
512	if (!ctx)
513		return NULL;
514
515	if (!umem->fill_save) {
516		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
517		if (err) {
518			free(ctx);
519			return NULL;
520		}
521	} else if (umem->fill_save != fill || umem->comp_save != comp) {
522		/* Copy over rings to new structs. */
523		memcpy(fill, umem->fill_save, sizeof(*fill));
524		memcpy(comp, umem->comp_save, sizeof(*comp));
525	}
526
527	ctx->ifindex = ifindex;
528	ctx->refcount = 1;
529	ctx->umem = umem;
530	ctx->queue_id = queue_id;
 
531
532	ctx->fill = fill;
533	ctx->comp = comp;
534	list_add(&ctx->list, &umem->ctx_list);
 
535	return ctx;
536}
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
539			      int ifindex,
540			      __u32 queue_id, struct xsk_umem *umem,
541			      struct xsk_ring_cons *rx,
542			      struct xsk_ring_prod *tx,
543			      struct xsk_ring_prod *fill,
544			      struct xsk_ring_cons *comp,
545			      const struct xsk_socket_config *usr_config)
546{
547	bool unmap, rx_setup_done = false, tx_setup_done = false;
548	void *rx_map = NULL, *tx_map = NULL;
549	struct sockaddr_xdp sxdp = {};
550	struct xdp_mmap_offsets off;
551	struct xsk_socket *xsk;
552	struct xsk_ctx *ctx;
553	int err;
554
555	if (!umem || !xsk_ptr || !(rx || tx))
556		return -EFAULT;
557
558	unmap = umem->fill_save != fill;
559
560	xsk = calloc(1, sizeof(*xsk));
561	if (!xsk)
562		return -ENOMEM;
563
564	err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
565	if (err)
566		goto out_xsk_alloc;
567
 
 
 
 
 
 
 
568	if (umem->refcount++ > 0) {
569		xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
570		if (xsk->fd < 0) {
571			err = -errno;
572			goto out_xsk_alloc;
573		}
574	} else {
575		xsk->fd = umem->fd;
576		rx_setup_done = umem->rx_ring_setup_done;
577		tx_setup_done = umem->tx_ring_setup_done;
578	}
579
580	ctx = xsk_get_ctx(umem, ifindex, queue_id);
581	if (!ctx) {
582		if (!fill || !comp) {
583			err = -EFAULT;
584			goto out_socket;
585		}
586
587		ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
 
588		if (!ctx) {
589			err = -ENOMEM;
590			goto out_socket;
591		}
592	}
593	xsk->ctx = ctx;
594
595	if (rx && !rx_setup_done) {
596		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
597				 &xsk->config.rx_size,
598				 sizeof(xsk->config.rx_size));
599		if (err) {
600			err = -errno;
601			goto out_put_ctx;
602		}
603		if (xsk->fd == umem->fd)
604			umem->rx_ring_setup_done = true;
605	}
606	if (tx && !tx_setup_done) {
607		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
608				 &xsk->config.tx_size,
609				 sizeof(xsk->config.tx_size));
610		if (err) {
611			err = -errno;
612			goto out_put_ctx;
613		}
614		if (xsk->fd == umem->fd)
615			umem->tx_ring_setup_done = true;
616	}
617
618	err = xsk_get_mmap_offsets(xsk->fd, &off);
619	if (err) {
620		err = -errno;
621		goto out_put_ctx;
622	}
623
624	if (rx) {
625		rx_map = mmap(NULL, off.rx.desc +
626			      xsk->config.rx_size * sizeof(struct xdp_desc),
627			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
628			      xsk->fd, XDP_PGOFF_RX_RING);
629		if (rx_map == MAP_FAILED) {
630			err = -errno;
631			goto out_put_ctx;
632		}
633
634		rx->mask = xsk->config.rx_size - 1;
635		rx->size = xsk->config.rx_size;
636		rx->producer = rx_map + off.rx.producer;
637		rx->consumer = rx_map + off.rx.consumer;
638		rx->flags = rx_map + off.rx.flags;
639		rx->ring = rx_map + off.rx.desc;
640		rx->cached_prod = *rx->producer;
641		rx->cached_cons = *rx->consumer;
642	}
643	xsk->rx = rx;
644
645	if (tx) {
646		tx_map = mmap(NULL, off.tx.desc +
647			      xsk->config.tx_size * sizeof(struct xdp_desc),
648			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
649			      xsk->fd, XDP_PGOFF_TX_RING);
650		if (tx_map == MAP_FAILED) {
651			err = -errno;
652			goto out_mmap_rx;
653		}
654
655		tx->mask = xsk->config.tx_size - 1;
656		tx->size = xsk->config.tx_size;
657		tx->producer = tx_map + off.tx.producer;
658		tx->consumer = tx_map + off.tx.consumer;
659		tx->flags = tx_map + off.tx.flags;
660		tx->ring = tx_map + off.tx.desc;
661		tx->cached_prod = *tx->producer;
662		/* cached_cons is r->size bigger than the real consumer pointer
663		 * See xsk_prod_nb_free
664		 */
665		tx->cached_cons = *tx->consumer + xsk->config.tx_size;
666	}
667	xsk->tx = tx;
668
669	sxdp.sxdp_family = PF_XDP;
670	sxdp.sxdp_ifindex = ctx->ifindex;
671	sxdp.sxdp_queue_id = ctx->queue_id;
672	if (umem->refcount > 1) {
673		sxdp.sxdp_flags |= XDP_SHARED_UMEM;
674		sxdp.sxdp_shared_umem_fd = umem->fd;
675	} else {
676		sxdp.sxdp_flags = xsk->config.bind_flags;
677	}
678
679	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
680	if (err) {
681		err = -errno;
682		goto out_mmap_tx;
683	}
684
 
 
 
 
 
 
685	*xsk_ptr = xsk;
686	umem->fill_save = NULL;
687	umem->comp_save = NULL;
688	return 0;
689
690out_mmap_tx:
691	if (tx)
692		munmap(tx_map, off.tx.desc +
693		       xsk->config.tx_size * sizeof(struct xdp_desc));
694out_mmap_rx:
695	if (rx)
696		munmap(rx_map, off.rx.desc +
697		       xsk->config.rx_size * sizeof(struct xdp_desc));
698out_put_ctx:
699	xsk_put_ctx(ctx, unmap);
700out_socket:
701	if (--umem->refcount)
702		close(xsk->fd);
703out_xsk_alloc:
704	free(xsk);
705	return err;
706}
707
708int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
709		       __u32 queue_id, struct xsk_umem *umem,
710		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
711		       const struct xsk_socket_config *usr_config)
712{
713	if (!umem)
714		return -EFAULT;
715
716	return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
717					 rx, tx, umem->fill_save,
718					 umem->comp_save, usr_config);
719}
720
721int xsk_umem__delete(struct xsk_umem *umem)
722{
723	struct xdp_mmap_offsets off;
724	int err;
725
726	if (!umem)
727		return 0;
728
729	if (umem->refcount)
730		return -EBUSY;
731
732	err = xsk_get_mmap_offsets(umem->fd, &off);
733	if (!err && umem->fill_save && umem->comp_save) {
734		munmap(umem->fill_save->ring - off.fr.desc,
735		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
736		munmap(umem->comp_save->ring - off.cr.desc,
737		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
738	}
739
740	close(umem->fd);
741	free(umem);
742
743	return 0;
744}
745
746void xsk_socket__delete(struct xsk_socket *xsk)
747{
748	size_t desc_sz = sizeof(struct xdp_desc);
749	struct xdp_mmap_offsets off;
750	struct xsk_umem *umem;
751	struct xsk_ctx *ctx;
752	int err;
753
754	if (!xsk)
755		return;
756
757	ctx = xsk->ctx;
758	umem = ctx->umem;
 
 
 
 
 
 
 
759
760	xsk_put_ctx(ctx, true);
761
762	err = xsk_get_mmap_offsets(xsk->fd, &off);
763	if (!err) {
764		if (xsk->rx) {
765			munmap(xsk->rx->ring - off.rx.desc,
766			       off.rx.desc + xsk->config.rx_size * desc_sz);
767		}
768		if (xsk->tx) {
769			munmap(xsk->tx->ring - off.tx.desc,
770			       off.tx.desc + xsk->config.tx_size * desc_sz);
771		}
772	}
773
774	umem->refcount--;
775	/* Do not close an fd that also has an associated umem connected
776	 * to it.
777	 */
778	if (xsk->fd != umem->fd)
779		close(xsk->fd);
780	free(xsk);
781}
v6.2
   1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3/*
   4 * AF_XDP user-space access library.
   5 *
   6 * Copyright(c) 2018 - 2019 Intel Corporation.
   7 *
   8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
   9 */
  10
  11#include <errno.h>
  12#include <stdlib.h>
  13#include <string.h>
  14#include <unistd.h>
  15#include <arpa/inet.h>
  16#include <asm/barrier.h>
  17#include <linux/compiler.h>
  18#include <linux/ethtool.h>
  19#include <linux/filter.h>
  20#include <linux/if_ether.h>
 
  21#include <linux/if_packet.h>
  22#include <linux/if_xdp.h>
  23#include <linux/kernel.h>
  24#include <linux/list.h>
 
 
  25#include <linux/sockios.h>
  26#include <net/if.h>
  27#include <sys/ioctl.h>
  28#include <sys/mman.h>
  29#include <sys/socket.h>
  30#include <sys/types.h>
  31#include <linux/if_link.h>
  32
  33#include <bpf/bpf.h>
  34#include <bpf/libbpf.h>
  35#include "xsk.h"
  36#include "bpf_util.h"
  37
  38#ifndef SOL_XDP
  39 #define SOL_XDP 283
  40#endif
  41
  42#ifndef AF_XDP
  43 #define AF_XDP 44
  44#endif
  45
  46#ifndef PF_XDP
  47 #define PF_XDP AF_XDP
  48#endif
  49
  50#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
  51
  52enum xsk_prog {
  53	XSK_PROG_FALLBACK,
  54	XSK_PROG_REDIRECT_FLAGS,
  55};
  56
  57struct xsk_umem {
  58	struct xsk_ring_prod *fill_save;
  59	struct xsk_ring_cons *comp_save;
  60	char *umem_area;
  61	struct xsk_umem_config config;
  62	int fd;
  63	int refcount;
  64	struct list_head ctx_list;
  65	bool rx_ring_setup_done;
  66	bool tx_ring_setup_done;
  67};
  68
  69struct xsk_ctx {
  70	struct xsk_ring_prod *fill;
  71	struct xsk_ring_cons *comp;
  72	__u32 queue_id;
  73	struct xsk_umem *umem;
  74	int refcount;
  75	int ifindex;
  76	struct list_head list;
  77	int prog_fd;
  78	int link_fd;
  79	int xsks_map_fd;
  80	char ifname[IFNAMSIZ];
  81	bool has_bpf_link;
  82};
  83
  84struct xsk_socket {
  85	struct xsk_ring_cons *rx;
  86	struct xsk_ring_prod *tx;
  87	__u64 outstanding_tx;
  88	struct xsk_ctx *ctx;
  89	struct xsk_socket_config config;
  90	int fd;
  91};
  92
  93struct xsk_nl_info {
  94	bool xdp_prog_attached;
  95	int ifindex;
  96	int fd;
  97};
  98
  99/* Up until and including Linux 5.3 */
 100struct xdp_ring_offset_v1 {
 101	__u64 producer;
 102	__u64 consumer;
 103	__u64 desc;
 104};
 105
 106/* Up until and including Linux 5.3 */
 107struct xdp_mmap_offsets_v1 {
 108	struct xdp_ring_offset_v1 rx;
 109	struct xdp_ring_offset_v1 tx;
 110	struct xdp_ring_offset_v1 fr;
 111	struct xdp_ring_offset_v1 cr;
 112};
 113
 114int xsk_umem__fd(const struct xsk_umem *umem)
 115{
 116	return umem ? umem->fd : -EINVAL;
 117}
 118
 119int xsk_socket__fd(const struct xsk_socket *xsk)
 120{
 121	return xsk ? xsk->fd : -EINVAL;
 122}
 123
 124static bool xsk_page_aligned(void *buffer)
 125{
 126	unsigned long addr = (unsigned long)buffer;
 127
 128	return !(addr & (getpagesize() - 1));
 129}
 130
 131static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 132				const struct xsk_umem_config *usr_cfg)
 133{
 134	if (!usr_cfg) {
 135		cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 136		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 137		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 138		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 139		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
 
 140		return;
 141	}
 142
 143	cfg->fill_size = usr_cfg->fill_size;
 144	cfg->comp_size = usr_cfg->comp_size;
 145	cfg->frame_size = usr_cfg->frame_size;
 146	cfg->frame_headroom = usr_cfg->frame_headroom;
 147	cfg->flags = usr_cfg->flags;
 
 148}
 149
 150static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
 151				     const struct xsk_socket_config *usr_cfg)
 152{
 153	if (!usr_cfg) {
 154		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 155		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 156		cfg->libbpf_flags = 0;
 157		cfg->xdp_flags = 0;
 158		cfg->bind_flags = 0;
 159		return 0;
 160	}
 161
 162	if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
 163		return -EINVAL;
 164
 165	cfg->rx_size = usr_cfg->rx_size;
 166	cfg->tx_size = usr_cfg->tx_size;
 167	cfg->libbpf_flags = usr_cfg->libbpf_flags;
 168	cfg->xdp_flags = usr_cfg->xdp_flags;
 169	cfg->bind_flags = usr_cfg->bind_flags;
 170
 171	return 0;
 172}
 173
 174static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
 175{
 176	struct xdp_mmap_offsets_v1 off_v1;
 177
 178	/* getsockopt on a kernel <= 5.3 has no flags fields.
 179	 * Copy over the offsets to the correct places in the >=5.4 format
 180	 * and put the flags where they would have been on that kernel.
 181	 */
 182	memcpy(&off_v1, off, sizeof(off_v1));
 183
 184	off->rx.producer = off_v1.rx.producer;
 185	off->rx.consumer = off_v1.rx.consumer;
 186	off->rx.desc = off_v1.rx.desc;
 187	off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
 188
 189	off->tx.producer = off_v1.tx.producer;
 190	off->tx.consumer = off_v1.tx.consumer;
 191	off->tx.desc = off_v1.tx.desc;
 192	off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
 193
 194	off->fr.producer = off_v1.fr.producer;
 195	off->fr.consumer = off_v1.fr.consumer;
 196	off->fr.desc = off_v1.fr.desc;
 197	off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
 198
 199	off->cr.producer = off_v1.cr.producer;
 200	off->cr.consumer = off_v1.cr.consumer;
 201	off->cr.desc = off_v1.cr.desc;
 202	off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
 203}
 204
 205static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
 206{
 207	socklen_t optlen;
 208	int err;
 209
 210	optlen = sizeof(*off);
 211	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
 212	if (err)
 213		return err;
 214
 215	if (optlen == sizeof(*off))
 216		return 0;
 217
 218	if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
 219		xsk_mmap_offsets_v1(off);
 220		return 0;
 221	}
 222
 223	return -EINVAL;
 224}
 225
 226static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
 227				 struct xsk_ring_prod *fill,
 228				 struct xsk_ring_cons *comp)
 229{
 230	struct xdp_mmap_offsets off;
 231	void *map;
 232	int err;
 233
 234	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
 235			 &umem->config.fill_size,
 236			 sizeof(umem->config.fill_size));
 237	if (err)
 238		return -errno;
 239
 240	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
 241			 &umem->config.comp_size,
 242			 sizeof(umem->config.comp_size));
 243	if (err)
 244		return -errno;
 245
 246	err = xsk_get_mmap_offsets(fd, &off);
 247	if (err)
 248		return -errno;
 249
 250	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
 251		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
 252		   XDP_UMEM_PGOFF_FILL_RING);
 253	if (map == MAP_FAILED)
 254		return -errno;
 255
 256	fill->mask = umem->config.fill_size - 1;
 257	fill->size = umem->config.fill_size;
 258	fill->producer = map + off.fr.producer;
 259	fill->consumer = map + off.fr.consumer;
 260	fill->flags = map + off.fr.flags;
 261	fill->ring = map + off.fr.desc;
 262	fill->cached_cons = umem->config.fill_size;
 263
 264	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
 265		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
 266		   XDP_UMEM_PGOFF_COMPLETION_RING);
 267	if (map == MAP_FAILED) {
 268		err = -errno;
 269		goto out_mmap;
 270	}
 271
 272	comp->mask = umem->config.comp_size - 1;
 273	comp->size = umem->config.comp_size;
 274	comp->producer = map + off.cr.producer;
 275	comp->consumer = map + off.cr.consumer;
 276	comp->flags = map + off.cr.flags;
 277	comp->ring = map + off.cr.desc;
 278
 279	return 0;
 280
 281out_mmap:
 282	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
 283	return err;
 284}
 285
 286int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
 287		     __u64 size, struct xsk_ring_prod *fill,
 288		     struct xsk_ring_cons *comp,
 289		     const struct xsk_umem_config *usr_config)
 290{
 291	struct xdp_umem_reg mr;
 292	struct xsk_umem *umem;
 293	int err;
 294
 295	if (!umem_area || !umem_ptr || !fill || !comp)
 296		return -EFAULT;
 297	if (!size && !xsk_page_aligned(umem_area))
 298		return -EINVAL;
 299
 300	umem = calloc(1, sizeof(*umem));
 301	if (!umem)
 302		return -ENOMEM;
 303
 304	umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
 305	if (umem->fd < 0) {
 306		err = -errno;
 307		goto out_umem_alloc;
 308	}
 309
 310	umem->umem_area = umem_area;
 311	INIT_LIST_HEAD(&umem->ctx_list);
 312	xsk_set_umem_config(&umem->config, usr_config);
 313
 314	memset(&mr, 0, sizeof(mr));
 315	mr.addr = (uintptr_t)umem_area;
 316	mr.len = size;
 317	mr.chunk_size = umem->config.frame_size;
 318	mr.headroom = umem->config.frame_headroom;
 319	mr.flags = umem->config.flags;
 
 320
 321	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
 322	if (err) {
 323		err = -errno;
 324		goto out_socket;
 325	}
 326
 327	err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
 328	if (err)
 329		goto out_socket;
 330
 331	umem->fill_save = fill;
 332	umem->comp_save = comp;
 333	*umem_ptr = umem;
 334	return 0;
 335
 336out_socket:
 337	close(umem->fd);
 338out_umem_alloc:
 339	free(umem);
 340	return err;
 341}
 342
 343struct xsk_umem_config_v1 {
 344	__u32 fill_size;
 345	__u32 comp_size;
 346	__u32 frame_size;
 347	__u32 frame_headroom;
 348};
 349
 350static enum xsk_prog get_xsk_prog(void)
 351{
 352	enum xsk_prog detected = XSK_PROG_FALLBACK;
 353	char data_in = 0, data_out;
 354	struct bpf_insn insns[] = {
 355		BPF_LD_MAP_FD(BPF_REG_1, 0),
 356		BPF_MOV64_IMM(BPF_REG_2, 0),
 357		BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
 358		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 359		BPF_EXIT_INSN(),
 360	};
 361	LIBBPF_OPTS(bpf_test_run_opts, opts,
 362		.data_in = &data_in,
 363		.data_size_in = 1,
 364		.data_out = &data_out,
 365	);
 366
 367	int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
 368
 369	map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
 370	if (map_fd < 0)
 371		return detected;
 372
 373	insns[0].imm = map_fd;
 374
 375	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
 376	if (prog_fd < 0) {
 377		close(map_fd);
 378		return detected;
 379	}
 380
 381	ret = bpf_prog_test_run_opts(prog_fd, &opts);
 382	if (!ret && opts.retval == XDP_PASS)
 383		detected = XSK_PROG_REDIRECT_FLAGS;
 384	close(prog_fd);
 385	close(map_fd);
 386	return detected;
 387}
 388
 389static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 390{
 391	static const int log_buf_size = 16 * 1024;
 392	struct xsk_ctx *ctx = xsk->ctx;
 393	char log_buf[log_buf_size];
 394	int prog_fd;
 395
 396	/* This is the fallback C-program:
 397	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 398	 * {
 399	 *     int ret, index = ctx->rx_queue_index;
 400	 *
 401	 *     // A set entry here means that the correspnding queue_id
 402	 *     // has an active AF_XDP socket bound to it.
 403	 *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
 404	 *     if (ret > 0)
 405	 *         return ret;
 406	 *
 407	 *     // Fallback for pre-5.3 kernels, not supporting default
 408	 *     // action in the flags parameter.
 409	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
 410	 *         return bpf_redirect_map(&xsks_map, index, 0);
 411	 *     return XDP_PASS;
 412	 * }
 413	 */
 414	struct bpf_insn prog[] = {
 415		/* r2 = *(u32 *)(r1 + 16) */
 416		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
 417		/* *(u32 *)(r10 - 4) = r2 */
 418		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
 419		/* r1 = xskmap[] */
 420		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 421		/* r3 = XDP_PASS */
 422		BPF_MOV64_IMM(BPF_REG_3, 2),
 423		/* call bpf_redirect_map */
 424		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 425		/* if w0 != 0 goto pc+13 */
 426		BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
 427		/* r2 = r10 */
 428		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 429		/* r2 += -4 */
 430		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
 431		/* r1 = xskmap[] */
 432		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 433		/* call bpf_map_lookup_elem */
 434		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 435		/* r1 = r0 */
 436		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 437		/* r0 = XDP_PASS */
 438		BPF_MOV64_IMM(BPF_REG_0, 2),
 439		/* if r1 == 0 goto pc+5 */
 440		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
 441		/* r2 = *(u32 *)(r10 - 4) */
 442		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
 443		/* r1 = xskmap[] */
 444		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 445		/* r3 = 0 */
 446		BPF_MOV64_IMM(BPF_REG_3, 0),
 447		/* call bpf_redirect_map */
 448		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 449		/* The jumps are to this instruction */
 450		BPF_EXIT_INSN(),
 451	};
 452
 453	/* This is the post-5.3 kernel C-program:
 454	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 455	 * {
 456	 *     return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
 457	 * }
 458	 */
 459	struct bpf_insn prog_redirect_flags[] = {
 460		/* r2 = *(u32 *)(r1 + 16) */
 461		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
 462		/* r1 = xskmap[] */
 463		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
 464		/* r3 = XDP_PASS */
 465		BPF_MOV64_IMM(BPF_REG_3, 2),
 466		/* call bpf_redirect_map */
 467		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 468		BPF_EXIT_INSN(),
 469	};
 470	size_t insns_cnt[] = {ARRAY_SIZE(prog),
 471			      ARRAY_SIZE(prog_redirect_flags),
 472	};
 473	struct bpf_insn *progs[] = {prog, prog_redirect_flags};
 474	enum xsk_prog option = get_xsk_prog();
 475	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 476		.log_buf = log_buf,
 477		.log_size = log_buf_size,
 478	);
 479
 480	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
 481				progs[option], insns_cnt[option], &opts);
 482	if (prog_fd < 0) {
 483		pr_warn("BPF log buffer:\n%s", log_buf);
 484		return prog_fd;
 485	}
 486
 487	ctx->prog_fd = prog_fd;
 488	return 0;
 489}
 490
 491static int xsk_create_bpf_link(struct xsk_socket *xsk)
 
 492{
 493	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
 494	struct xsk_ctx *ctx = xsk->ctx;
 495	__u32 prog_id = 0;
 496	int link_fd;
 497	int err;
 498
 499	err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
 500	if (err) {
 501		pr_warn("getting XDP prog id failed\n");
 502		return err;
 503	}
 504
 505	/* if there's a netlink-based XDP prog loaded on interface, bail out
 506	 * and ask user to do the removal by himself
 507	 */
 508	if (prog_id) {
 509		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
 510		return -EINVAL;
 511	}
 512
 513	opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
 514
 515	link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
 516	if (link_fd < 0) {
 517		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
 518		return link_fd;
 519	}
 520
 521	ctx->link_fd = link_fd;
 522	return 0;
 523}
 524
 525static int xsk_get_max_queues(struct xsk_socket *xsk)
 526{
 527	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
 528	struct xsk_ctx *ctx = xsk->ctx;
 529	struct ifreq ifr = {};
 530	int fd, err, ret;
 531
 532	fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
 533	if (fd < 0)
 534		return -errno;
 535
 536	ifr.ifr_data = (void *)&channels;
 537	bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
 538	err = ioctl(fd, SIOCETHTOOL, &ifr);
 539	if (err && errno != EOPNOTSUPP) {
 540		ret = -errno;
 541		goto out;
 542	}
 543
 544	if (err) {
 545		/* If the device says it has no channels, then all traffic
 546		 * is sent to a single stream, so max queues = 1.
 547		 */
 548		ret = 1;
 549	} else {
 550		/* Take the max of rx, tx, combined. Drivers return
 551		 * the number of channels in different ways.
 552		 */
 553		ret = max(channels.max_rx, channels.max_tx);
 554		ret = max(ret, (int)channels.max_combined);
 555	}
 556
 557out:
 558	close(fd);
 559	return ret;
 560}
 561
 562static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 
 563{
 564	struct xsk_ctx *ctx = xsk->ctx;
 565	int max_queues;
 566	int fd;
 567
 568	max_queues = xsk_get_max_queues(xsk);
 569	if (max_queues < 0)
 570		return max_queues;
 571
 572	fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
 573			    sizeof(int), sizeof(int), max_queues, NULL);
 574	if (fd < 0)
 575		return fd;
 576
 577	ctx->xsks_map_fd = fd;
 578
 
 
 579	return 0;
 580}
 581
 582static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 
 583{
 584	struct xsk_ctx *ctx = xsk->ctx;
 585
 586	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
 587	close(ctx->xsks_map_fd);
 588}
 589
 590static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 591{
 592	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
 593	__u32 map_len = sizeof(struct bpf_map_info);
 594	struct bpf_prog_info prog_info = {};
 595	struct xsk_ctx *ctx = xsk->ctx;
 596	struct bpf_map_info map_info;
 597	int fd, err;
 598
 599	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
 600	if (err)
 601		return err;
 602
 603	num_maps = prog_info.nr_map_ids;
 604
 605	map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
 606	if (!map_ids)
 607		return -ENOMEM;
 608
 609	memset(&prog_info, 0, prog_len);
 610	prog_info.nr_map_ids = num_maps;
 611	prog_info.map_ids = (__u64)(unsigned long)map_ids;
 612
 613	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
 614	if (err)
 615		goto out_map_ids;
 616
 617	ctx->xsks_map_fd = -1;
 618
 619	for (i = 0; i < prog_info.nr_map_ids; i++) {
 620		fd = bpf_map_get_fd_by_id(map_ids[i]);
 621		if (fd < 0)
 622			continue;
 623
 624		memset(&map_info, 0, map_len);
 625		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
 626		if (err) {
 627			close(fd);
 628			continue;
 629		}
 630
 631		if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
 632			ctx->xsks_map_fd = fd;
 633			break;
 
 634		}
 635
 636		close(fd);
 637	}
 638
 639	if (ctx->xsks_map_fd == -1)
 640		err = -ENOENT;
 641
 642out_map_ids:
 643	free(map_ids);
 644	return err;
 645}
 646
 647static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 648{
 649	struct xsk_ctx *ctx = xsk->ctx;
 650
 651	return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
 652				   &xsk->fd, 0);
 653}
 654
 655static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
 656{
 657	struct bpf_link_info link_info;
 658	__u32 link_len;
 659	__u32 id = 0;
 660	int err;
 661	int fd;
 662
 663	while (true) {
 664		err = bpf_link_get_next_id(id, &id);
 665		if (err) {
 666			if (errno == ENOENT) {
 667				err = 0;
 668				break;
 669			}
 670			pr_warn("can't get next link: %s\n", strerror(errno));
 671			break;
 672		}
 673
 674		fd = bpf_link_get_fd_by_id(id);
 675		if (fd < 0) {
 676			if (errno == ENOENT)
 677				continue;
 678			pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
 679			err = -errno;
 680			break;
 681		}
 682
 683		link_len = sizeof(struct bpf_link_info);
 684		memset(&link_info, 0, link_len);
 685		err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
 686		if (err) {
 687			pr_warn("can't get link info: %s\n", strerror(errno));
 688			close(fd);
 689			break;
 690		}
 691		if (link_info.type == BPF_LINK_TYPE_XDP) {
 692			if (link_info.xdp.ifindex == ifindex) {
 693				*link_fd = fd;
 694				if (prog_id)
 695					*prog_id = link_info.prog_id;
 
 
 696				break;
 697			}
 698		}
 699		close(fd);
 700	}
 701
 702	return err;
 
 
 703}
 704
 705static bool xsk_probe_bpf_link(void)
 706{
 707	LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
 708	struct bpf_insn insns[2] = {
 709		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
 710		BPF_EXIT_INSN()
 711	};
 712	int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
 713	int ifindex_lo = 1;
 714	bool ret = false;
 715	int err;
 716
 717	err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
 718	if (err)
 719		return ret;
 720
 721	if (link_fd >= 0)
 722		return true;
 
 
 
 
 
 
 
 
 
 723
 724	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
 725	if (prog_fd < 0)
 726		return ret;
 727
 728	link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
 729	close(prog_fd);
 730
 731	if (link_fd >= 0) {
 732		ret = true;
 733		close(link_fd);
 734	}
 735
 
 
 736	return ret;
 737}
 738
 739static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 740{
 741	char ifname[IFNAMSIZ];
 742	struct xsk_ctx *ctx;
 743	char *interface;
 744
 745	ctx = calloc(1, sizeof(*ctx));
 746	if (!ctx)
 747		return -ENOMEM;
 748
 749	interface = if_indextoname(ifindex, &ifname[0]);
 750	if (!interface) {
 751		free(ctx);
 752		return -errno;
 753	}
 754
 755	ctx->ifindex = ifindex;
 756	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
 757
 758	xsk->ctx = ctx;
 759	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
 760
 761	return 0;
 762}
 763
 764static int xsk_init_xdp_res(struct xsk_socket *xsk,
 765			    int *xsks_map_fd)
 766{
 767	struct xsk_ctx *ctx = xsk->ctx;
 768	int err;
 769
 770	err = xsk_create_bpf_maps(xsk);
 771	if (err)
 772		return err;
 773
 774	err = xsk_load_xdp_prog(xsk);
 775	if (err)
 776		goto err_load_xdp_prog;
 777
 778	if (ctx->has_bpf_link)
 779		err = xsk_create_bpf_link(xsk);
 780	else
 781		err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
 782				     xsk->config.xdp_flags, NULL);
 783
 784	if (err)
 785		goto err_attach_xdp_prog;
 786
 787	if (!xsk->rx)
 788		return err;
 789
 790	err = xsk_set_bpf_maps(xsk);
 791	if (err)
 792		goto err_set_bpf_maps;
 793
 794	return err;
 795
 796err_set_bpf_maps:
 797	if (ctx->has_bpf_link)
 798		close(ctx->link_fd);
 799	else
 800		bpf_xdp_detach(ctx->ifindex, 0, NULL);
 801err_attach_xdp_prog:
 802	close(ctx->prog_fd);
 803err_load_xdp_prog:
 804	xsk_delete_bpf_maps(xsk);
 805	return err;
 806}
 807
 808static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
 809{
 810	struct xsk_ctx *ctx = xsk->ctx;
 811	int err;
 812
 813	ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 814	if (ctx->prog_fd < 0) {
 815		err = -errno;
 816		goto err_prog_fd;
 817	}
 818	err = xsk_lookup_bpf_maps(xsk);
 819	if (err)
 820		goto err_lookup_maps;
 821
 822	if (!xsk->rx)
 823		return err;
 824
 825	err = xsk_set_bpf_maps(xsk);
 826	if (err)
 827		goto err_set_maps;
 828
 829	return err;
 830
 831err_set_maps:
 832	close(ctx->xsks_map_fd);
 833err_lookup_maps:
 834	close(ctx->prog_fd);
 835err_prog_fd:
 836	if (ctx->has_bpf_link)
 837		close(ctx->link_fd);
 838	return err;
 839}
 840
 841static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
 842{
 843	struct xsk_socket *xsk = _xdp;
 844	struct xsk_ctx *ctx = xsk->ctx;
 845	__u32 prog_id = 0;
 846	int err;
 847
 848	if (ctx->has_bpf_link)
 849		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
 850	else
 851		err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
 852
 853	if (err)
 854		return err;
 855
 856	err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
 857			 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
 858
 859	if (!err && xsks_map_fd)
 860		*xsks_map_fd = ctx->xsks_map_fd;
 861
 862	return err;
 863}
 864
 865int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
 866{
 867	return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
 868}
 869
 870static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
 871				   __u32 queue_id)
 872{
 873	struct xsk_ctx *ctx;
 874
 875	if (list_empty(&umem->ctx_list))
 876		return NULL;
 877
 878	list_for_each_entry(ctx, &umem->ctx_list, list) {
 879		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
 880			ctx->refcount++;
 881			return ctx;
 882		}
 883	}
 884
 885	return NULL;
 886}
 887
 888static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
 889{
 890	struct xsk_umem *umem = ctx->umem;
 891	struct xdp_mmap_offsets off;
 892	int err;
 893
 894	if (--ctx->refcount)
 895		return;
 896
 897	if (!unmap)
 898		goto out_free;
 899
 900	err = xsk_get_mmap_offsets(umem->fd, &off);
 901	if (err)
 902		goto out_free;
 903
 904	munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
 905	       sizeof(__u64));
 906	munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
 907	       sizeof(__u64));
 908
 909out_free:
 910	list_del(&ctx->list);
 911	free(ctx);
 912}
 913
 914static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 915				      struct xsk_umem *umem, int ifindex,
 916				      const char *ifname, __u32 queue_id,
 917				      struct xsk_ring_prod *fill,
 918				      struct xsk_ring_cons *comp)
 919{
 920	struct xsk_ctx *ctx;
 921	int err;
 922
 923	ctx = calloc(1, sizeof(*ctx));
 924	if (!ctx)
 925		return NULL;
 926
 927	if (!umem->fill_save) {
 928		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
 929		if (err) {
 930			free(ctx);
 931			return NULL;
 932		}
 933	} else if (umem->fill_save != fill || umem->comp_save != comp) {
 934		/* Copy over rings to new structs. */
 935		memcpy(fill, umem->fill_save, sizeof(*fill));
 936		memcpy(comp, umem->comp_save, sizeof(*comp));
 937	}
 938
 939	ctx->ifindex = ifindex;
 940	ctx->refcount = 1;
 941	ctx->umem = umem;
 942	ctx->queue_id = queue_id;
 943	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
 944
 945	ctx->fill = fill;
 946	ctx->comp = comp;
 947	list_add(&ctx->list, &umem->ctx_list);
 948	ctx->has_bpf_link = xsk_probe_bpf_link();
 949	return ctx;
 950}
 951
 952static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
 953{
 954	free(xsk->ctx);
 955	free(xsk);
 956}
 957
 958int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
 959{
 960	xsk->ctx->xsks_map_fd = fd;
 961	return xsk_set_bpf_maps(xsk);
 962}
 963
 964int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
 965{
 966	struct xsk_socket *xsk;
 967	int res;
 968
 969	xsk = calloc(1, sizeof(*xsk));
 970	if (!xsk)
 971		return -ENOMEM;
 972
 973	res = xsk_create_xsk_struct(ifindex, xsk);
 974	if (res) {
 975		free(xsk);
 976		return -EINVAL;
 977	}
 978
 979	res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
 980
 981	xsk_destroy_xsk_struct(xsk);
 982
 983	return res;
 984}
 985
 986int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 987			      const char *ifname,
 988			      __u32 queue_id, struct xsk_umem *umem,
 989			      struct xsk_ring_cons *rx,
 990			      struct xsk_ring_prod *tx,
 991			      struct xsk_ring_prod *fill,
 992			      struct xsk_ring_cons *comp,
 993			      const struct xsk_socket_config *usr_config)
 994{
 995	bool unmap, rx_setup_done = false, tx_setup_done = false;
 996	void *rx_map = NULL, *tx_map = NULL;
 997	struct sockaddr_xdp sxdp = {};
 998	struct xdp_mmap_offsets off;
 999	struct xsk_socket *xsk;
1000	struct xsk_ctx *ctx;
1001	int err, ifindex;
1002
1003	if (!umem || !xsk_ptr || !(rx || tx))
1004		return -EFAULT;
1005
1006	unmap = umem->fill_save != fill;
1007
1008	xsk = calloc(1, sizeof(*xsk));
1009	if (!xsk)
1010		return -ENOMEM;
1011
1012	err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
1013	if (err)
1014		goto out_xsk_alloc;
1015
1016	xsk->outstanding_tx = 0;
1017	ifindex = if_nametoindex(ifname);
1018	if (!ifindex) {
1019		err = -errno;
1020		goto out_xsk_alloc;
1021	}
1022
1023	if (umem->refcount++ > 0) {
1024		xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
1025		if (xsk->fd < 0) {
1026			err = -errno;
1027			goto out_xsk_alloc;
1028		}
1029	} else {
1030		xsk->fd = umem->fd;
1031		rx_setup_done = umem->rx_ring_setup_done;
1032		tx_setup_done = umem->tx_ring_setup_done;
1033	}
1034
1035	ctx = xsk_get_ctx(umem, ifindex, queue_id);
1036	if (!ctx) {
1037		if (!fill || !comp) {
1038			err = -EFAULT;
1039			goto out_socket;
1040		}
1041
1042		ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
1043				     fill, comp);
1044		if (!ctx) {
1045			err = -ENOMEM;
1046			goto out_socket;
1047		}
1048	}
1049	xsk->ctx = ctx;
1050
1051	if (rx && !rx_setup_done) {
1052		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
1053				 &xsk->config.rx_size,
1054				 sizeof(xsk->config.rx_size));
1055		if (err) {
1056			err = -errno;
1057			goto out_put_ctx;
1058		}
1059		if (xsk->fd == umem->fd)
1060			umem->rx_ring_setup_done = true;
1061	}
1062	if (tx && !tx_setup_done) {
1063		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
1064				 &xsk->config.tx_size,
1065				 sizeof(xsk->config.tx_size));
1066		if (err) {
1067			err = -errno;
1068			goto out_put_ctx;
1069		}
1070		if (xsk->fd == umem->fd)
1071			umem->tx_ring_setup_done = true;
1072	}
1073
1074	err = xsk_get_mmap_offsets(xsk->fd, &off);
1075	if (err) {
1076		err = -errno;
1077		goto out_put_ctx;
1078	}
1079
1080	if (rx) {
1081		rx_map = mmap(NULL, off.rx.desc +
1082			      xsk->config.rx_size * sizeof(struct xdp_desc),
1083			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
1084			      xsk->fd, XDP_PGOFF_RX_RING);
1085		if (rx_map == MAP_FAILED) {
1086			err = -errno;
1087			goto out_put_ctx;
1088		}
1089
1090		rx->mask = xsk->config.rx_size - 1;
1091		rx->size = xsk->config.rx_size;
1092		rx->producer = rx_map + off.rx.producer;
1093		rx->consumer = rx_map + off.rx.consumer;
1094		rx->flags = rx_map + off.rx.flags;
1095		rx->ring = rx_map + off.rx.desc;
1096		rx->cached_prod = *rx->producer;
1097		rx->cached_cons = *rx->consumer;
1098	}
1099	xsk->rx = rx;
1100
1101	if (tx) {
1102		tx_map = mmap(NULL, off.tx.desc +
1103			      xsk->config.tx_size * sizeof(struct xdp_desc),
1104			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
1105			      xsk->fd, XDP_PGOFF_TX_RING);
1106		if (tx_map == MAP_FAILED) {
1107			err = -errno;
1108			goto out_mmap_rx;
1109		}
1110
1111		tx->mask = xsk->config.tx_size - 1;
1112		tx->size = xsk->config.tx_size;
1113		tx->producer = tx_map + off.tx.producer;
1114		tx->consumer = tx_map + off.tx.consumer;
1115		tx->flags = tx_map + off.tx.flags;
1116		tx->ring = tx_map + off.tx.desc;
1117		tx->cached_prod = *tx->producer;
1118		/* cached_cons is r->size bigger than the real consumer pointer
1119		 * See xsk_prod_nb_free
1120		 */
1121		tx->cached_cons = *tx->consumer + xsk->config.tx_size;
1122	}
1123	xsk->tx = tx;
1124
1125	sxdp.sxdp_family = PF_XDP;
1126	sxdp.sxdp_ifindex = ctx->ifindex;
1127	sxdp.sxdp_queue_id = ctx->queue_id;
1128	if (umem->refcount > 1) {
1129		sxdp.sxdp_flags |= XDP_SHARED_UMEM;
1130		sxdp.sxdp_shared_umem_fd = umem->fd;
1131	} else {
1132		sxdp.sxdp_flags = xsk->config.bind_flags;
1133	}
1134
1135	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
1136	if (err) {
1137		err = -errno;
1138		goto out_mmap_tx;
1139	}
1140
1141	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
1142		err = __xsk_setup_xdp_prog(xsk, NULL);
1143		if (err)
1144			goto out_mmap_tx;
1145	}
1146
1147	*xsk_ptr = xsk;
1148	umem->fill_save = NULL;
1149	umem->comp_save = NULL;
1150	return 0;
1151
1152out_mmap_tx:
1153	if (tx)
1154		munmap(tx_map, off.tx.desc +
1155		       xsk->config.tx_size * sizeof(struct xdp_desc));
1156out_mmap_rx:
1157	if (rx)
1158		munmap(rx_map, off.rx.desc +
1159		       xsk->config.rx_size * sizeof(struct xdp_desc));
1160out_put_ctx:
1161	xsk_put_ctx(ctx, unmap);
1162out_socket:
1163	if (--umem->refcount)
1164		close(xsk->fd);
1165out_xsk_alloc:
1166	free(xsk);
1167	return err;
1168}
1169
1170int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
1171		       __u32 queue_id, struct xsk_umem *umem,
1172		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
1173		       const struct xsk_socket_config *usr_config)
1174{
1175	if (!umem)
1176		return -EFAULT;
1177
1178	return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
1179					 rx, tx, umem->fill_save,
1180					 umem->comp_save, usr_config);
1181}
1182
1183int xsk_umem__delete(struct xsk_umem *umem)
1184{
1185	struct xdp_mmap_offsets off;
1186	int err;
1187
1188	if (!umem)
1189		return 0;
1190
1191	if (umem->refcount)
1192		return -EBUSY;
1193
1194	err = xsk_get_mmap_offsets(umem->fd, &off);
1195	if (!err && umem->fill_save && umem->comp_save) {
1196		munmap(umem->fill_save->ring - off.fr.desc,
1197		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
1198		munmap(umem->comp_save->ring - off.cr.desc,
1199		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
1200	}
1201
1202	close(umem->fd);
1203	free(umem);
1204
1205	return 0;
1206}
1207
1208void xsk_socket__delete(struct xsk_socket *xsk)
1209{
1210	size_t desc_sz = sizeof(struct xdp_desc);
1211	struct xdp_mmap_offsets off;
1212	struct xsk_umem *umem;
1213	struct xsk_ctx *ctx;
1214	int err;
1215
1216	if (!xsk)
1217		return;
1218
1219	ctx = xsk->ctx;
1220	umem = ctx->umem;
1221
1222	if (ctx->refcount == 1) {
1223		xsk_delete_bpf_maps(xsk);
1224		close(ctx->prog_fd);
1225		if (ctx->has_bpf_link)
1226			close(ctx->link_fd);
1227	}
1228
1229	xsk_put_ctx(ctx, true);
1230
1231	err = xsk_get_mmap_offsets(xsk->fd, &off);
1232	if (!err) {
1233		if (xsk->rx) {
1234			munmap(xsk->rx->ring - off.rx.desc,
1235			       off.rx.desc + xsk->config.rx_size * desc_sz);
1236		}
1237		if (xsk->tx) {
1238			munmap(xsk->tx->ring - off.tx.desc,
1239			       off.tx.desc + xsk->config.tx_size * desc_sz);
1240		}
1241	}
1242
1243	umem->refcount--;
1244	/* Do not close an fd that also has an associated umem connected
1245	 * to it.
1246	 */
1247	if (xsk->fd != umem->fd)
1248		close(xsk->fd);
1249	free(xsk);
1250}