Loading...
1/* SPDX-License-Identifier: GPL-2.0 */
2/* AF_XDP internal functions
3 * Copyright(c) 2018 Intel Corporation.
4 */
5
6#ifndef _LINUX_XDP_SOCK_H
7#define _LINUX_XDP_SOCK_H
8
9#include <linux/bpf.h>
10#include <linux/workqueue.h>
11#include <linux/if_xdp.h>
12#include <linux/mutex.h>
13#include <linux/spinlock.h>
14#include <linux/mm.h>
15#include <net/sock.h>
16
17#define XDP_UMEM_SG_FLAG (1 << 1)
18
19struct net_device;
20struct xsk_queue;
21struct xdp_buff;
22
23struct xdp_umem {
24 void *addrs;
25 u64 size;
26 u32 headroom;
27 u32 chunk_size;
28 u32 chunks;
29 u32 npgs;
30 struct user_struct *user;
31 refcount_t users;
32 u8 flags;
33 u8 tx_metadata_len;
34 bool zc;
35 struct page **pgs;
36 int id;
37 struct list_head xsk_dma_list;
38 struct work_struct work;
39};
40
41struct xsk_map {
42 struct bpf_map map;
43 spinlock_t lock; /* Synchronize map updates */
44 atomic_t count;
45 struct xdp_sock __rcu *xsk_map[];
46};
47
48struct xdp_sock {
49 /* struct sock must be the first member of struct xdp_sock */
50 struct sock sk;
51 struct xsk_queue *rx ____cacheline_aligned_in_smp;
52 struct net_device *dev;
53 struct xdp_umem *umem;
54 struct list_head flush_node;
55 struct xsk_buff_pool *pool;
56 u16 queue_id;
57 bool zc;
58 bool sg;
59 enum {
60 XSK_READY = 0,
61 XSK_BOUND,
62 XSK_UNBOUND,
63 } state;
64
65 struct xsk_queue *tx ____cacheline_aligned_in_smp;
66 struct list_head tx_list;
67 /* record the number of tx descriptors sent by this xsk and
68 * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
69 * to be given to other xsks for sending tx descriptors, thereby
70 * preventing other XSKs from being starved.
71 */
72 u32 tx_budget_spent;
73
74 /* Protects generic receive. */
75 spinlock_t rx_lock;
76
77 /* Statistics */
78 u64 rx_dropped;
79 u64 rx_queue_full;
80
81 /* When __xsk_generic_xmit() must return before it sees the EOP descriptor for the current
82 * packet, the partially built skb is saved here so that packet building can resume in next
83 * call of __xsk_generic_xmit().
84 */
85 struct sk_buff *skb;
86
87 struct list_head map_list;
88 /* Protects map_list */
89 spinlock_t map_list_lock;
90 /* Protects multiple processes in the control path */
91 struct mutex mutex;
92 struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
93 struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
94};
95
96/*
97 * AF_XDP TX metadata hooks for network devices.
98 * The following hooks can be defined; unless noted otherwise, they are
99 * optional and can be filled with a null pointer.
100 *
101 * void (*tmo_request_timestamp)(void *priv)
102 * Called when AF_XDP frame requested egress timestamp.
103 *
104 * u64 (*tmo_fill_timestamp)(void *priv)
105 * Called when AF_XDP frame, that had requested egress timestamp,
106 * received a completion. The hook needs to return the actual HW timestamp.
107 *
108 * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
109 * Called when AF_XDP frame requested HW checksum offload. csum_start
110 * indicates position where checksumming should start.
111 * csum_offset indicates position where checksum should be stored.
112 *
113 */
114struct xsk_tx_metadata_ops {
115 void (*tmo_request_timestamp)(void *priv);
116 u64 (*tmo_fill_timestamp)(void *priv);
117 void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
118};
119
120#ifdef CONFIG_XDP_SOCKETS
121
122int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
123int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
124void __xsk_map_flush(struct list_head *flush_list);
125
126/**
127 * xsk_tx_metadata_to_compl - Save enough relevant metadata information
128 * to perform tx completion in the future.
129 * @meta: pointer to AF_XDP metadata area
130 * @compl: pointer to output struct xsk_tx_metadata_to_compl
131 *
132 * This function should be called by the networking device when
133 * it prepares AF_XDP egress packet. The value of @compl should be stored
134 * and passed to xsk_tx_metadata_complete upon TX completion.
135 */
136static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
137 struct xsk_tx_metadata_compl *compl)
138{
139 if (!meta)
140 return;
141
142 if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
143 compl->tx_timestamp = &meta->completion.tx_timestamp;
144 else
145 compl->tx_timestamp = NULL;
146}
147
148/**
149 * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
150 * and call appropriate xsk_tx_metadata_ops operation.
151 * @meta: pointer to AF_XDP metadata area
152 * @ops: pointer to struct xsk_tx_metadata_ops
153 * @priv: pointer to driver-private aread
154 *
155 * This function should be called by the networking device when
156 * it prepares AF_XDP egress packet.
157 */
158static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
159 const struct xsk_tx_metadata_ops *ops,
160 void *priv)
161{
162 if (!meta)
163 return;
164
165 if (ops->tmo_request_timestamp)
166 if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
167 ops->tmo_request_timestamp(priv);
168
169 if (ops->tmo_request_checksum)
170 if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
171 ops->tmo_request_checksum(meta->request.csum_start,
172 meta->request.csum_offset, priv);
173}
174
175/**
176 * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
177 * and call appropriate xsk_tx_metadata_ops operation.
178 * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
179 * @ops: pointer to struct xsk_tx_metadata_ops
180 * @priv: pointer to driver-private aread
181 *
182 * This function should be called by the networking device upon
183 * AF_XDP egress completion.
184 */
185static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
186 const struct xsk_tx_metadata_ops *ops,
187 void *priv)
188{
189 if (!compl)
190 return;
191 if (!compl->tx_timestamp)
192 return;
193
194 *compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
195}
196
197#else
198
199static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
200{
201 return -ENOTSUPP;
202}
203
204static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
205{
206 return -EOPNOTSUPP;
207}
208
209static inline void __xsk_map_flush(struct list_head *flush_list)
210{
211}
212
213static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
214 struct xsk_tx_metadata_compl *compl)
215{
216}
217
218static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
219 const struct xsk_tx_metadata_ops *ops,
220 void *priv)
221{
222}
223
224static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
225 const struct xsk_tx_metadata_ops *ops,
226 void *priv)
227{
228}
229
230#endif /* CONFIG_XDP_SOCKETS */
231#endif /* _LINUX_XDP_SOCK_H */
1/* SPDX-License-Identifier: GPL-2.0 */
2/* AF_XDP internal functions
3 * Copyright(c) 2018 Intel Corporation.
4 */
5
6#ifndef _LINUX_XDP_SOCK_H
7#define _LINUX_XDP_SOCK_H
8
9#include <linux/workqueue.h>
10#include <linux/if_xdp.h>
11#include <linux/mutex.h>
12#include <linux/spinlock.h>
13#include <linux/mm.h>
14#include <net/sock.h>
15
16struct net_device;
17struct xsk_queue;
18
19/* Masks for xdp_umem_page flags.
20 * The low 12-bits of the addr will be 0 since this is the page address, so we
21 * can use them for flags.
22 */
23#define XSK_NEXT_PG_CONTIG_SHIFT 0
24#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
25
26struct xdp_umem_page {
27 void *addr;
28 dma_addr_t dma;
29};
30
31struct xdp_umem_fq_reuse {
32 u32 nentries;
33 u32 length;
34 u64 handles[];
35};
36
37/* Flags for the umem flags field.
38 *
39 * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
40 * flags. See inlude/uapi/include/linux/if_xdp.h.
41 */
42#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
43
44struct xdp_umem {
45 struct xsk_queue *fq;
46 struct xsk_queue *cq;
47 struct xdp_umem_page *pages;
48 u64 chunk_mask;
49 u64 size;
50 u32 headroom;
51 u32 chunk_size_nohr;
52 struct user_struct *user;
53 unsigned long address;
54 refcount_t users;
55 struct work_struct work;
56 struct page **pgs;
57 u32 npgs;
58 u16 queue_id;
59 u8 need_wakeup;
60 u8 flags;
61 int id;
62 struct net_device *dev;
63 struct xdp_umem_fq_reuse *fq_reuse;
64 bool zc;
65 spinlock_t xsk_list_lock;
66 struct list_head xsk_list;
67};
68
69/* Nodes are linked in the struct xdp_sock map_list field, and used to
70 * track which maps a certain socket reside in.
71 */
72struct xsk_map;
73struct xsk_map_node {
74 struct list_head node;
75 struct xsk_map *map;
76 struct xdp_sock **map_entry;
77};
78
79struct xdp_sock {
80 /* struct sock must be the first member of struct xdp_sock */
81 struct sock sk;
82 struct xsk_queue *rx;
83 struct net_device *dev;
84 struct xdp_umem *umem;
85 struct list_head flush_node;
86 u16 queue_id;
87 bool zc;
88 enum {
89 XSK_READY = 0,
90 XSK_BOUND,
91 XSK_UNBOUND,
92 } state;
93 /* Protects multiple processes in the control path */
94 struct mutex mutex;
95 struct xsk_queue *tx ____cacheline_aligned_in_smp;
96 struct list_head list;
97 /* Mutual exclusion of NAPI TX thread and sendmsg error paths
98 * in the SKB destructor callback.
99 */
100 spinlock_t tx_completion_lock;
101 /* Protects generic receive. */
102 spinlock_t rx_lock;
103 u64 rx_dropped;
104 struct list_head map_list;
105 /* Protects map_list */
106 spinlock_t map_list_lock;
107};
108
109struct xdp_buff;
110#ifdef CONFIG_XDP_SOCKETS
111int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
112int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
113void xsk_flush(struct xdp_sock *xs);
114bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
115/* Used from netdev driver */
116bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
117u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
118void xsk_umem_discard_addr(struct xdp_umem *umem);
119void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
120bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
121void xsk_umem_consume_tx_done(struct xdp_umem *umem);
122struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
123struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
124 struct xdp_umem_fq_reuse *newq);
125void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
126struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
127void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
128void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
129void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
130void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
131bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
132
133void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
134 struct xdp_sock **map_entry);
135int xsk_map_inc(struct xsk_map *map);
136void xsk_map_put(struct xsk_map *map);
137
138static inline u64 xsk_umem_extract_addr(u64 addr)
139{
140 return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
141}
142
143static inline u64 xsk_umem_extract_offset(u64 addr)
144{
145 return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
146}
147
148static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
149{
150 return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
151}
152
153static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
154{
155 unsigned long page_addr;
156
157 addr = xsk_umem_add_offset_to_addr(addr);
158 page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
159
160 return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
161}
162
163static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
164{
165 addr = xsk_umem_add_offset_to_addr(addr);
166
167 return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
168}
169
170/* Reuse-queue aware version of FILL queue helpers */
171static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
172{
173 struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
174
175 if (rq->length >= cnt)
176 return true;
177
178 return xsk_umem_has_addrs(umem, cnt - rq->length);
179}
180
181static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
182{
183 struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
184
185 if (!rq->length)
186 return xsk_umem_peek_addr(umem, addr);
187
188 *addr = rq->handles[rq->length - 1];
189 return addr;
190}
191
192static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
193{
194 struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
195
196 if (!rq->length)
197 xsk_umem_discard_addr(umem);
198 else
199 rq->length--;
200}
201
202static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
203{
204 struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
205
206 rq->handles[rq->length++] = addr;
207}
208
209/* Handle the offset appropriately depending on aligned or unaligned mode.
210 * For unaligned mode, we store the offset in the upper 16-bits of the address.
211 * For aligned mode, we simply add the offset to the address.
212 */
213static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
214 u64 offset)
215{
216 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
217 return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
218 else
219 return address + offset;
220}
221#else
222static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
223{
224 return -ENOTSUPP;
225}
226
227static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
228{
229 return -ENOTSUPP;
230}
231
232static inline void xsk_flush(struct xdp_sock *xs)
233{
234}
235
236static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
237{
238 return false;
239}
240
241static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
242{
243 return false;
244}
245
246static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
247{
248 return NULL;
249}
250
251static inline void xsk_umem_discard_addr(struct xdp_umem *umem)
252{
253}
254
255static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
256{
257}
258
259static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
260 struct xdp_desc *desc)
261{
262 return false;
263}
264
265static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
266{
267}
268
269static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
270{
271 return NULL;
272}
273
274static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
275 struct xdp_umem *umem,
276 struct xdp_umem_fq_reuse *newq)
277{
278 return NULL;
279}
280static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
281{
282}
283
284static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
285 u16 queue_id)
286{
287 return NULL;
288}
289
290static inline u64 xsk_umem_extract_addr(u64 addr)
291{
292 return 0;
293}
294
295static inline u64 xsk_umem_extract_offset(u64 addr)
296{
297 return 0;
298}
299
300static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
301{
302 return 0;
303}
304
305static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
306{
307 return NULL;
308}
309
310static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
311{
312 return 0;
313}
314
315static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
316{
317 return false;
318}
319
320static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
321{
322 return NULL;
323}
324
325static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
326{
327}
328
329static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
330{
331}
332
333static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
334{
335}
336
337static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
338{
339}
340
341static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
342{
343}
344
345static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
346{
347}
348
349static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
350{
351 return false;
352}
353
354static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
355 u64 offset)
356{
357 return 0;
358}
359
360#endif /* CONFIG_XDP_SOCKETS */
361
362#endif /* _LINUX_XDP_SOCK_H */