Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0
  2
  3#include "io_uring.h"
  4#include "napi.h"
  5
  6#ifdef CONFIG_NET_RX_BUSY_POLL
  7
  8/* Timeout for cleanout of stale entries. */
  9#define NAPI_TIMEOUT		(60 * SEC_CONVERSION)
 10
 11struct io_napi_entry {
 12	unsigned int		napi_id;
 13	struct list_head	list;
 14
 15	unsigned long		timeout;
 16	struct hlist_node	node;
 17
 18	struct rcu_head		rcu;
 19};
 20
 21static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
 22					       unsigned int napi_id)
 23{
 24	struct io_napi_entry *e;
 25
 26	hlist_for_each_entry_rcu(e, hash_list, node) {
 27		if (e->napi_id != napi_id)
 28			continue;
 29		return e;
 30	}
 31
 32	return NULL;
 33}
 34
 35static inline ktime_t net_to_ktime(unsigned long t)
 36{
 37	/* napi approximating usecs, reverse busy_loop_current_time */
 38	return ns_to_ktime(t << 10);
 39}
 40
 41int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
 42{
 43	struct hlist_head *hash_list;
 44	struct io_napi_entry *e;
 45
 46	/* Non-NAPI IDs can be rejected. */
 47	if (napi_id < MIN_NAPI_ID)
 48		return -EINVAL;
 49
 50	hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
 51
 52	scoped_guard(rcu) {
 53		e = io_napi_hash_find(hash_list, napi_id);
 54		if (e) {
 55			WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT);
 56			return -EEXIST;
 57		}
 58	}
 59
 60	e = kmalloc(sizeof(*e), GFP_NOWAIT);
 61	if (!e)
 62		return -ENOMEM;
 63
 64	e->napi_id = napi_id;
 65	e->timeout = jiffies + NAPI_TIMEOUT;
 66
 67	/*
 68	 * guard(spinlock) is not used to manually unlock it before calling
 69	 * kfree()
 70	 */
 71	spin_lock(&ctx->napi_lock);
 72	if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
 73		spin_unlock(&ctx->napi_lock);
 74		kfree(e);
 75		return -EEXIST;
 76	}
 77
 78	hlist_add_tail_rcu(&e->node, hash_list);
 79	list_add_tail_rcu(&e->list, &ctx->napi_list);
 80	spin_unlock(&ctx->napi_lock);
 81	return 0;
 82}
 83
 84static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
 85{
 86	struct hlist_head *hash_list;
 87	struct io_napi_entry *e;
 88
 89	/* Non-NAPI IDs can be rejected. */
 90	if (napi_id < MIN_NAPI_ID)
 91		return -EINVAL;
 92
 93	hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
 94	guard(spinlock)(&ctx->napi_lock);
 95	e = io_napi_hash_find(hash_list, napi_id);
 96	if (!e)
 97		return -ENOENT;
 98
 99	list_del_rcu(&e->list);
100	hash_del_rcu(&e->node);
101	kfree_rcu(e, rcu);
102	return 0;
103}
104
105static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
106{
107	struct io_napi_entry *e;
108
109	guard(spinlock)(&ctx->napi_lock);
110	/*
111	 * list_for_each_entry_safe() is not required as long as:
112	 * 1. list_del_rcu() does not reset the deleted node next pointer
113	 * 2. kfree_rcu() delays the memory freeing until the next quiescent
114	 *    state
115	 */
116	list_for_each_entry(e, &ctx->napi_list, list) {
117		if (time_after(jiffies, READ_ONCE(e->timeout))) {
118			list_del_rcu(&e->list);
119			hash_del_rcu(&e->node);
120			kfree_rcu(e, rcu);
121		}
122	}
123}
124
125static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
126{
127	if (is_stale)
128		__io_napi_remove_stale(ctx);
129}
130
131static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
132					     ktime_t bp)
133{
134	if (bp) {
135		ktime_t end_time = ktime_add(start_time, bp);
136		ktime_t now = net_to_ktime(busy_loop_current_time());
137
138		return ktime_after(now, end_time);
139	}
140
141	return true;
142}
143
144static bool io_napi_busy_loop_should_end(void *data,
145					 unsigned long start_time)
146{
147	struct io_wait_queue *iowq = data;
148
149	if (signal_pending(current))
150		return true;
151	if (io_should_wake(iowq) || io_has_work(iowq->ctx))
152		return true;
153	if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
154				      iowq->napi_busy_poll_dt))
155		return true;
156
157	return false;
158}
159
160/*
161 * never report stale entries
162 */
163static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
164					 bool (*loop_end)(void *, unsigned long),
165					 void *loop_end_arg)
166{
167	struct io_napi_entry *e;
168
169	list_for_each_entry_rcu(e, &ctx->napi_list, list)
170		napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
171				   ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
172	return false;
173}
174
175static bool
176dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
177			      bool (*loop_end)(void *, unsigned long),
178			      void *loop_end_arg)
179{
180	struct io_napi_entry *e;
181	bool is_stale = false;
182
183	list_for_each_entry_rcu(e, &ctx->napi_list, list) {
184		napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
185				   ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
186
187		if (time_after(jiffies, READ_ONCE(e->timeout)))
188			is_stale = true;
189	}
190
191	return is_stale;
192}
193
194static inline bool
195__io_napi_do_busy_loop(struct io_ring_ctx *ctx,
196		       bool (*loop_end)(void *, unsigned long),
197		       void *loop_end_arg)
198{
199	if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
200		return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
201	return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
202}
203
204static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
205				       struct io_wait_queue *iowq)
206{
207	unsigned long start_time = busy_loop_current_time();
208	bool (*loop_end)(void *, unsigned long) = NULL;
209	void *loop_end_arg = NULL;
210	bool is_stale = false;
211
212	/* Singular lists use a different napi loop end check function and are
213	 * only executed once.
214	 */
215	if (list_is_singular(&ctx->napi_list)) {
216		loop_end = io_napi_busy_loop_should_end;
217		loop_end_arg = iowq;
218	}
219
220	scoped_guard(rcu) {
221		do {
222			is_stale = __io_napi_do_busy_loop(ctx, loop_end,
223							  loop_end_arg);
224		} while (!io_napi_busy_loop_should_end(iowq, start_time) &&
225			 !loop_end_arg);
226	}
227
228	io_napi_remove_stale(ctx, is_stale);
229}
230
231/*
232 * io_napi_init() - Init napi settings
233 * @ctx: pointer to io-uring context structure
234 *
235 * Init napi settings in the io-uring context.
236 */
237void io_napi_init(struct io_ring_ctx *ctx)
238{
239	u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
240
241	INIT_LIST_HEAD(&ctx->napi_list);
242	spin_lock_init(&ctx->napi_lock);
243	ctx->napi_prefer_busy_poll = false;
244	ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
245	ctx->napi_track_mode = IO_URING_NAPI_TRACKING_INACTIVE;
246}
247
248/*
249 * io_napi_free() - Deallocate napi
250 * @ctx: pointer to io-uring context structure
251 *
252 * Free the napi list and the hash table in the io-uring context.
253 */
254void io_napi_free(struct io_ring_ctx *ctx)
255{
256	struct io_napi_entry *e;
257
258	guard(spinlock)(&ctx->napi_lock);
259	list_for_each_entry(e, &ctx->napi_list, list) {
260		hash_del_rcu(&e->node);
261		kfree_rcu(e, rcu);
262	}
263	INIT_LIST_HEAD_RCU(&ctx->napi_list);
264}
265
266static int io_napi_register_napi(struct io_ring_ctx *ctx,
267				 struct io_uring_napi *napi)
268{
269	switch (napi->op_param) {
270	case IO_URING_NAPI_TRACKING_DYNAMIC:
271	case IO_URING_NAPI_TRACKING_STATIC:
272		break;
273	default:
274		return -EINVAL;
275	}
276	/* clean the napi list for new settings */
277	io_napi_free(ctx);
278	WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
279	WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
280	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
281	return 0;
282}
283
284/*
285 * io_napi_register() - Register napi with io-uring
286 * @ctx: pointer to io-uring context structure
287 * @arg: pointer to io_uring_napi structure
288 *
289 * Register napi in the io-uring context.
290 */
291int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
292{
293	const struct io_uring_napi curr = {
294		.busy_poll_to 	  = ktime_to_us(ctx->napi_busy_poll_dt),
295		.prefer_busy_poll = ctx->napi_prefer_busy_poll,
296		.op_param	  = ctx->napi_track_mode
297	};
298	struct io_uring_napi napi;
299
300	if (ctx->flags & IORING_SETUP_IOPOLL)
301		return -EINVAL;
302	if (copy_from_user(&napi, arg, sizeof(napi)))
303		return -EFAULT;
304	if (napi.pad[0] || napi.pad[1] || napi.resv)
305		return -EINVAL;
306
307	if (copy_to_user(arg, &curr, sizeof(curr)))
308		return -EFAULT;
309
310	switch (napi.opcode) {
311	case IO_URING_NAPI_REGISTER_OP:
312		return io_napi_register_napi(ctx, &napi);
313	case IO_URING_NAPI_STATIC_ADD_ID:
314		if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
315			return -EINVAL;
316		return __io_napi_add_id(ctx, napi.op_param);
317	case IO_URING_NAPI_STATIC_DEL_ID:
318		if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
319			return -EINVAL;
320		return __io_napi_del_id(ctx, napi.op_param);
321	default:
322		return -EINVAL;
323	}
324}
325
326/*
327 * io_napi_unregister() - Unregister napi with io-uring
328 * @ctx: pointer to io-uring context structure
329 * @arg: pointer to io_uring_napi structure
330 *
331 * Unregister napi. If arg has been specified copy the busy poll timeout and
332 * prefer busy poll setting to the passed in structure.
333 */
334int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
335{
336	const struct io_uring_napi curr = {
337		.busy_poll_to 	  = ktime_to_us(ctx->napi_busy_poll_dt),
338		.prefer_busy_poll = ctx->napi_prefer_busy_poll
339	};
340
341	if (arg && copy_to_user(arg, &curr, sizeof(curr)))
342		return -EFAULT;
343
344	WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
345	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
346	WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
347	return 0;
348}
349
350/*
351 * __io_napi_busy_loop() - execute busy poll loop
352 * @ctx: pointer to io-uring context structure
353 * @iowq: pointer to io wait queue
354 *
355 * Execute the busy poll loop and merge the spliced off list.
356 */
357void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
358{
359	if (ctx->flags & IORING_SETUP_SQPOLL)
360		return;
361
362	iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
363	if (iowq->timeout != KTIME_MAX) {
364		ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
365
366		iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
367	}
368
369	iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
370	io_napi_blocking_busy_loop(ctx, iowq);
371}
372
373/*
374 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
375 * @ctx: pointer to io-uring context structure
376 *
377 * Splice of the napi list and execute the napi busy poll loop.
378 */
379int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
380{
381	bool is_stale = false;
382
383	if (!READ_ONCE(ctx->napi_busy_poll_dt))
384		return 0;
385	if (list_empty_careful(&ctx->napi_list))
386		return 0;
387
388	scoped_guard(rcu) {
389		is_stale = __io_napi_do_busy_loop(ctx, NULL, NULL);
390	}
391
392	io_napi_remove_stale(ctx, is_stale);
393	return 1;
394}
395
396#endif