Linux Audio

Check our new training course

Loading...
v6.2
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  4 * fairer distribution of tags between multiple submitters when a shared tag map
  5 * is used.
  6 *
  7 * Copyright (C) 2013-2014 Jens Axboe
  8 */
  9#include <linux/kernel.h>
 10#include <linux/module.h>
 11
 12#include <linux/blk-mq.h>
 13#include <linux/delay.h>
 14#include "blk.h"
 15#include "blk-mq.h"
 16#include "blk-mq-sched.h"
 17#include "blk-mq-tag.h"
 18
 19/*
 20 * Recalculate wakeup batch when tag is shared by hctx.
 21 */
 22static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 23		unsigned int users)
 24{
 25	if (!users)
 26		return;
 27
 28	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
 29			users);
 30	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
 31			users);
 32}
 33
 34/*
 35 * If a previously inactive queue goes active, bump the active user count.
 36 * We need to do this before try to allocate driver tag, then even if fail
 37 * to get tag when first time, the other shared-tag users could reserve
 38 * budget for it.
 39 */
 40void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 41{
 42	unsigned int users;
 
 
 43
 44	if (blk_mq_is_shared_tags(hctx->flags)) {
 45		struct request_queue *q = hctx->queue;
 46
 47		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 48			return;
 49		set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
 50	} else {
 51		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 52			return;
 53		set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
 54	}
 55
 56	users = atomic_inc_return(&hctx->tags->active_queues);
 57
 58	blk_mq_update_wake_batch(hctx->tags, users);
 59}
 60
 61/*
 62 * Wakeup all potentially sleeping on tags
 63 */
 64void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 65{
 66	sbitmap_queue_wake_all(&tags->bitmap_tags);
 67	if (include_reserve)
 68		sbitmap_queue_wake_all(&tags->breserved_tags);
 69}
 70
 71/*
 72 * If a previously busy queue goes inactive, potential waiters could now
 73 * be allowed to queue. Wake them up and check.
 74 */
 75void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 76{
 77	struct blk_mq_tags *tags = hctx->tags;
 78	unsigned int users;
 79
 80	if (blk_mq_is_shared_tags(hctx->flags)) {
 81		struct request_queue *q = hctx->queue;
 82
 83		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
 84					&q->queue_flags))
 85			return;
 86	} else {
 87		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 88			return;
 89	}
 90
 91	users = atomic_dec_return(&tags->active_queues);
 92
 93	blk_mq_update_wake_batch(tags, users);
 94
 95	blk_mq_tag_wakeup_all(tags, false);
 96}
 97
 98static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 99			    struct sbitmap_queue *bt)
 
 
 
 
100{
101	if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
102			!hctx_may_queue(data->hctx, bt))
103		return BLK_MQ_NO_TAG;
 
 
 
104
105	if (data->shallow_depth)
106		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
107	else
108		return __sbitmap_queue_get(bt);
 
 
 
 
 
 
 
 
 
 
 
109}
110
111unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
112			      unsigned int *offset)
113{
114	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
115	struct sbitmap_queue *bt = &tags->bitmap_tags;
116	unsigned long ret;
117
118	if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||
119	    data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
120		return 0;
121	ret = __sbitmap_queue_get_batch(bt, nr_tags, offset);
122	*offset += tags->nr_reserved_tags;
123	return ret;
124}
125
126unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 
127{
128	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
129	struct sbitmap_queue *bt;
130	struct sbq_wait_state *ws;
131	DEFINE_SBQ_WAIT(wait);
132	unsigned int tag_offset;
133	int tag;
134
135	if (data->flags & BLK_MQ_REQ_RESERVED) {
136		if (unlikely(!tags->nr_reserved_tags)) {
137			WARN_ON_ONCE(1);
138			return BLK_MQ_NO_TAG;
139		}
140		bt = &tags->breserved_tags;
141		tag_offset = 0;
142	} else {
143		bt = &tags->bitmap_tags;
144		tag_offset = tags->nr_reserved_tags;
145	}
146
147	tag = __blk_mq_get_tag(data, bt);
148	if (tag != BLK_MQ_NO_TAG)
149		goto found_tag;
150
151	if (data->flags & BLK_MQ_REQ_NOWAIT)
152		return BLK_MQ_NO_TAG;
153
154	ws = bt_wait_ptr(bt, data->hctx);
155	do {
156		struct sbitmap_queue *bt_prev;
 
 
 
 
157
158		/*
159		 * We're out of tags on this hardware queue, kick any
160		 * pending IO submits before going to sleep waiting for
161		 * some to complete.
 
162		 */
163		blk_mq_run_hw_queue(data->hctx, false);
 
164
165		/*
166		 * Retry tag allocation after running the hardware queue,
167		 * as running the queue may also have found completions.
168		 */
169		tag = __blk_mq_get_tag(data, bt);
170		if (tag != BLK_MQ_NO_TAG)
171			break;
172
173		sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
174
175		tag = __blk_mq_get_tag(data, bt);
176		if (tag != BLK_MQ_NO_TAG)
177			break;
178
179		bt_prev = bt;
180		io_schedule();
181
182		sbitmap_finish_wait(bt, ws, &wait);
183
184		data->ctx = blk_mq_get_ctx(data->q);
185		data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
186						data->ctx);
187		tags = blk_mq_tags_from_data(data);
188		if (data->flags & BLK_MQ_REQ_RESERVED)
189			bt = &tags->breserved_tags;
190		else
191			bt = &tags->bitmap_tags;
 
 
 
192
193		/*
194		 * If destination hw queue is changed, fake wake up on
195		 * previous queue for compensating the wake up miss, so
196		 * other allocations on previous queue won't be starved.
197		 */
198		if (bt != bt_prev)
199			sbitmap_queue_wake_up(bt_prev, 1);
200
201		ws = bt_wait_ptr(bt, data->hctx);
202	} while (1);
 
203
204	sbitmap_finish_wait(bt, ws, &wait);
 
 
 
205
206found_tag:
207	/*
208	 * Give up this allocation if the hctx is inactive.  The caller will
209	 * retry on an active hctx.
210	 */
211	if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
212		blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
213		return BLK_MQ_NO_TAG;
 
 
214	}
215	return tag + tag_offset;
 
 
 
 
 
 
 
 
 
 
 
 
 
216}
217
218void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
219		    unsigned int tag)
220{
221	if (!blk_mq_tag_is_reserved(tags, tag)) {
 
 
222		const int real_tag = tag - tags->nr_reserved_tags;
223
224		BUG_ON(real_tag >= tags->nr_tags);
225		sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
226	} else {
 
227		sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
228	}
229}
230
231void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
232{
233	sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
234					tag_array, nr_tags);
235}
236
237struct bt_iter_data {
238	struct blk_mq_hw_ctx *hctx;
239	struct request_queue *q;
240	busy_tag_iter_fn *fn;
241	void *data;
242	bool reserved;
243};
244
245static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
246		unsigned int bitnr)
247{
248	struct request *rq;
249	unsigned long flags;
250
251	spin_lock_irqsave(&tags->lock, flags);
252	rq = tags->rqs[bitnr];
253	if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
254		rq = NULL;
255	spin_unlock_irqrestore(&tags->lock, flags);
256	return rq;
257}
258
259static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
260{
261	struct bt_iter_data *iter_data = data;
262	struct blk_mq_hw_ctx *hctx = iter_data->hctx;
263	struct request_queue *q = iter_data->q;
264	struct blk_mq_tag_set *set = q->tag_set;
265	struct blk_mq_tags *tags;
266	struct request *rq;
267	bool ret = true;
268
269	if (blk_mq_is_shared_tags(set->flags))
270		tags = set->shared_tags;
271	else
272		tags = hctx->tags;
273
274	if (!iter_data->reserved)
275		bitnr += tags->nr_reserved_tags;
276	/*
277	 * We can hit rq == NULL here, because the tagging functions
278	 * test and set the bit before assigning ->rqs[].
279	 */
280	rq = blk_mq_find_and_get_req(tags, bitnr);
281	if (!rq)
282		return true;
283
284	if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
285		ret = iter_data->fn(rq, iter_data->data);
286	blk_mq_put_rq_ref(rq);
287	return ret;
288}
289
290/**
291 * bt_for_each - iterate over the requests associated with a hardware queue
292 * @hctx:	Hardware queue to examine.
293 * @q:		Request queue to examine.
294 * @bt:		sbitmap to examine. This is either the breserved_tags member
295 *		or the bitmap_tags member of struct blk_mq_tags.
296 * @fn:		Pointer to the function that will be called for each request
297 *		associated with @hctx that has been assigned a driver tag.
298 *		@fn will be called as follows: @fn(@hctx, rq, @data, @reserved)
299 *		where rq is a pointer to a request. Return true to continue
300 *		iterating tags, false to stop.
301 * @data:	Will be passed as third argument to @fn.
302 * @reserved:	Indicates whether @bt is the breserved_tags member or the
303 *		bitmap_tags member of struct blk_mq_tags.
304 */
305static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
306			struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
307			void *data, bool reserved)
308{
309	struct bt_iter_data iter_data = {
310		.hctx = hctx,
311		.fn = fn,
312		.data = data,
313		.reserved = reserved,
314		.q = q,
315	};
316
317	sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
318}
319
320struct bt_tags_iter_data {
321	struct blk_mq_tags *tags;
322	busy_tag_iter_fn *fn;
323	void *data;
324	unsigned int flags;
325};
326
327#define BT_TAG_ITER_RESERVED		(1 << 0)
328#define BT_TAG_ITER_STARTED		(1 << 1)
329#define BT_TAG_ITER_STATIC_RQS		(1 << 2)
330
331static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
332{
333	struct bt_tags_iter_data *iter_data = data;
334	struct blk_mq_tags *tags = iter_data->tags;
 
335	struct request *rq;
336	bool ret = true;
337	bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
338
339	if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
340		bitnr += tags->nr_reserved_tags;
 
341
342	/*
343	 * We can hit rq == NULL here, because the tagging functions
344	 * test and set the bit before assigning ->rqs[].
345	 */
346	if (iter_static_rqs)
347		rq = tags->static_rqs[bitnr];
348	else
349		rq = blk_mq_find_and_get_req(tags, bitnr);
350	if (!rq)
351		return true;
352
353	if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
354	    blk_mq_request_started(rq))
355		ret = iter_data->fn(rq, iter_data->data);
356	if (!iter_static_rqs)
357		blk_mq_put_rq_ref(rq);
358	return ret;
359}
360
361/**
362 * bt_tags_for_each - iterate over the requests in a tag map
363 * @tags:	Tag map to iterate over.
364 * @bt:		sbitmap to examine. This is either the breserved_tags member
365 *		or the bitmap_tags member of struct blk_mq_tags.
366 * @fn:		Pointer to the function that will be called for each started
367 *		request. @fn will be called as follows: @fn(rq, @data,
368 *		@reserved) where rq is a pointer to a request. Return true
369 *		to continue iterating tags, false to stop.
370 * @data:	Will be passed as second argument to @fn.
371 * @flags:	BT_TAG_ITER_*
372 */
373static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
374			     busy_tag_iter_fn *fn, void *data, unsigned int flags)
375{
376	struct bt_tags_iter_data iter_data = {
377		.tags = tags,
378		.fn = fn,
379		.data = data,
380		.flags = flags,
381	};
382
383	if (tags->rqs)
384		sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
385}
386
387static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
388		busy_tag_iter_fn *fn, void *priv, unsigned int flags)
389{
390	WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
391
392	if (tags->nr_reserved_tags)
393		bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
394				 flags | BT_TAG_ITER_RESERVED);
395	bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
396}
397
398/**
399 * blk_mq_all_tag_iter - iterate over all requests in a tag map
400 * @tags:	Tag map to iterate over.
401 * @fn:		Pointer to the function that will be called for each
402 *		request. @fn will be called as follows: @fn(rq, @priv,
403 *		reserved) where rq is a pointer to a request. 'reserved'
404 *		indicates whether or not @rq is a reserved request. Return
405 *		true to continue iterating tags, false to stop.
406 * @priv:	Will be passed as second argument to @fn.
407 *
408 * Caller has to pass the tag map from which requests are allocated.
409 */
410void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
411		void *priv)
412{
413	__blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
414}
415
416/**
417 * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
418 * @tagset:	Tag set to iterate over.
419 * @fn:		Pointer to the function that will be called for each started
420 *		request. @fn will be called as follows: @fn(rq, @priv,
421 *		reserved) where rq is a pointer to a request. 'reserved'
422 *		indicates whether or not @rq is a reserved request. Return
423 *		true to continue iterating tags, false to stop.
424 * @priv:	Will be passed as second argument to @fn.
425 *
426 * We grab one request reference before calling @fn and release it after
427 * @fn returns.
428 */
429void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
430		busy_tag_iter_fn *fn, void *priv)
431{
432	unsigned int flags = tagset->flags;
433	int i, nr_tags;
434
435	nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
436
437	for (i = 0; i < nr_tags; i++) {
438		if (tagset->tags && tagset->tags[i])
439			__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
440					      BT_TAG_ITER_STARTED);
441	}
442}
443EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
444
445static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
446{
447	unsigned *count = data;
448
449	if (blk_mq_request_completed(rq))
450		(*count)++;
451	return true;
452}
453
454/**
455 * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
456 * completions have finished.
457 * @tagset:	Tag set to drain completed request
458 *
459 * Note: This function has to be run after all IO queues are shutdown
460 */
461void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
462{
463	while (true) {
464		unsigned count = 0;
465
466		blk_mq_tagset_busy_iter(tagset,
467				blk_mq_tagset_count_completed_rqs, &count);
468		if (!count)
469			break;
470		msleep(5);
 
 
 
 
471	}
 
 
 
472}
473EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
474
475/**
476 * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
477 * @q:		Request queue to examine.
478 * @fn:		Pointer to the function that will be called for each request
479 *		on @q. @fn will be called as follows: @fn(hctx, rq, @priv,
480 *		reserved) where rq is a pointer to a request and hctx points
481 *		to the hardware queue associated with the request. 'reserved'
482 *		indicates whether or not @rq is a reserved request.
483 * @priv:	Will be passed as third argument to @fn.
484 *
485 * Note: if @q->tag_set is shared with other request queues then @fn will be
486 * called for all requests on all queues that share that tag set and not only
487 * for requests associated with @q.
488 */
489void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
490		void *priv)
491{
492	/*
493	 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and hctx_table
494	 * while the queue is frozen. So we can use q_usage_counter to avoid
495	 * racing with it.
496	 */
497	if (!percpu_ref_tryget(&q->q_usage_counter))
498		return;
499
500	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
501		struct blk_mq_tags *tags = q->tag_set->shared_tags;
502		struct sbitmap_queue *bresv = &tags->breserved_tags;
503		struct sbitmap_queue *btags = &tags->bitmap_tags;
504
505		if (tags->nr_reserved_tags)
506			bt_for_each(NULL, q, bresv, fn, priv, true);
507		bt_for_each(NULL, q, btags, fn, priv, false);
508	} else {
509		struct blk_mq_hw_ctx *hctx;
510		unsigned long i;
511
512		queue_for_each_hw_ctx(q, hctx, i) {
513			struct blk_mq_tags *tags = hctx->tags;
514			struct sbitmap_queue *bresv = &tags->breserved_tags;
515			struct sbitmap_queue *btags = &tags->bitmap_tags;
516
517			/*
518			 * If no software queues are currently mapped to this
519			 * hardware queue, there's nothing to check
520			 */
521			if (!blk_mq_hw_queue_mapped(hctx))
522				continue;
523
524			if (tags->nr_reserved_tags)
525				bt_for_each(hctx, q, bresv, fn, priv, true);
526			bt_for_each(hctx, q, btags, fn, priv, false);
527		}
528	}
529	blk_queue_exit(q);
 
 
 
 
 
530}
531
532static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
533		    bool round_robin, int node)
534{
535	return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
536				       node);
537}
538
539int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
540			struct sbitmap_queue *breserved_tags,
541			unsigned int queue_depth, unsigned int reserved,
542			int node, int alloc_policy)
543{
544	unsigned int depth = queue_depth - reserved;
545	bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
546
547	if (bt_alloc(bitmap_tags, depth, round_robin, node))
548		return -ENOMEM;
549	if (bt_alloc(breserved_tags, reserved, round_robin, node))
 
550		goto free_bitmap_tags;
551
552	return 0;
553
554free_bitmap_tags:
555	sbitmap_queue_free(bitmap_tags);
556	return -ENOMEM;
 
 
557}
558
559struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
560				     unsigned int reserved_tags,
561				     int node, int alloc_policy)
562{
563	struct blk_mq_tags *tags;
564
565	if (total_tags > BLK_MQ_TAG_MAX) {
566		pr_err("blk-mq: tag depth too large\n");
567		return NULL;
568	}
569
570	tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
571	if (!tags)
572		return NULL;
573
574	tags->nr_tags = total_tags;
575	tags->nr_reserved_tags = reserved_tags;
576	spin_lock_init(&tags->lock);
577
578	if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags,
579				total_tags, reserved_tags, node,
580				alloc_policy) < 0) {
581		kfree(tags);
582		return NULL;
583	}
584	return tags;
585}
586
587void blk_mq_free_tags(struct blk_mq_tags *tags)
588{
589	sbitmap_queue_free(&tags->bitmap_tags);
590	sbitmap_queue_free(&tags->breserved_tags);
591	kfree(tags);
592}
593
594int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
595			    struct blk_mq_tags **tagsptr, unsigned int tdepth,
596			    bool can_grow)
597{
598	struct blk_mq_tags *tags = *tagsptr;
599
600	if (tdepth <= tags->nr_reserved_tags)
601		return -EINVAL;
602
603	/*
604	 * If we are allowed to grow beyond the original size, allocate
605	 * a new set of tags before freeing the old one.
606	 */
607	if (tdepth > tags->nr_tags) {
608		struct blk_mq_tag_set *set = hctx->queue->tag_set;
609		struct blk_mq_tags *new;
610
611		if (!can_grow)
612			return -EINVAL;
613
614		/*
615		 * We need some sort of upper limit, set it high enough that
616		 * no valid use cases should require more.
617		 */
618		if (tdepth > MAX_SCHED_RQ)
619			return -EINVAL;
620
621		/*
622		 * Only the sbitmap needs resizing since we allocated the max
623		 * initially.
624		 */
625		if (blk_mq_is_shared_tags(set->flags))
626			return 0;
627
628		new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
629		if (!new)
630			return -ENOMEM;
631
632		blk_mq_free_map_and_rqs(set, *tagsptr, hctx->queue_num);
633		*tagsptr = new;
634	} else {
635		/*
636		 * Don't need (or can't) update reserved tags here, they
637		 * remain static and should never need resizing.
638		 */
639		sbitmap_queue_resize(&tags->bitmap_tags,
640				tdepth - tags->nr_reserved_tags);
641	}
642
 
643	return 0;
644}
645
646void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
647{
648	struct blk_mq_tags *tags = set->shared_tags;
649
650	sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
651}
652
653void blk_mq_tag_update_sched_shared_tags(struct request_queue *q)
654{
655	sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
656			     q->nr_requests - q->tag_set->reserved_tags);
657}
658
659/**
660 * blk_mq_unique_tag() - return a tag that is unique queue-wide
661 * @rq: request for which to compute a unique tag
662 *
663 * The tag field in struct request is unique per hardware queue but not over
664 * all hardware queues. Hence this function that returns a tag with the
665 * hardware context index in the upper bits and the per hardware queue tag in
666 * the lower bits.
667 *
668 * Note: When called for a request that is queued on a non-multiqueue request
669 * queue, the hardware context index is set to zero.
670 */
671u32 blk_mq_unique_tag(struct request *rq)
672{
673	return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
 
 
 
 
 
 
 
 
 
674		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
675}
676EXPORT_SYMBOL(blk_mq_unique_tag);
v4.10.11
 
  1/*
  2 * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  3 * fairer distribution of tags between multiple submitters when a shared tag map
  4 * is used.
  5 *
  6 * Copyright (C) 2013-2014 Jens Axboe
  7 */
  8#include <linux/kernel.h>
  9#include <linux/module.h>
 10
 11#include <linux/blk-mq.h>
 
 12#include "blk.h"
 13#include "blk-mq.h"
 
 14#include "blk-mq-tag.h"
 15
 16bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 
 
 
 
 17{
 18	if (!tags)
 19		return true;
 20
 21	return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
 
 
 
 22}
 23
 24/*
 25 * If a previously inactive queue goes active, bump the active user count.
 
 
 
 26 */
 27bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 28{
 29	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
 30	    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 31		atomic_inc(&hctx->tags->active_queues);
 32
 33	return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 34}
 35
 36/*
 37 * Wakeup all potentially sleeping on tags
 38 */
 39void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 40{
 41	sbitmap_queue_wake_all(&tags->bitmap_tags);
 42	if (include_reserve)
 43		sbitmap_queue_wake_all(&tags->breserved_tags);
 44}
 45
 46/*
 47 * If a previously busy queue goes inactive, potential waiters could now
 48 * be allowed to queue. Wake them up and check.
 49 */
 50void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 51{
 52	struct blk_mq_tags *tags = hctx->tags;
 
 53
 54	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 55		return;
 
 
 
 
 
 
 
 
 
 
 56
 57	atomic_dec(&tags->active_queues);
 58
 59	blk_mq_tag_wakeup_all(tags, false);
 60}
 61
 62/*
 63 * For shared tag users, we track the number of currently active users
 64 * and attempt to provide a fair share of the tag depth for each of them.
 65 */
 66static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 67				  struct sbitmap_queue *bt)
 68{
 69	unsigned int depth, users;
 70
 71	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
 72		return true;
 73	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 74		return true;
 75
 76	/*
 77	 * Don't try dividing an ant
 78	 */
 79	if (bt->sb.depth == 1)
 80		return true;
 81
 82	users = atomic_read(&hctx->tags->active_queues);
 83	if (!users)
 84		return true;
 85
 86	/*
 87	 * Allow at least some tags
 88	 */
 89	depth = max((bt->sb.depth + users - 1) / users, 4U);
 90	return atomic_read(&hctx->nr_active) < depth;
 91}
 92
 93static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
 
 94{
 95	if (!hctx_may_queue(hctx, bt))
 96		return -1;
 97	return __sbitmap_queue_get(bt);
 
 
 
 
 
 
 
 98}
 99
100static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
101		  struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
102{
 
 
103	struct sbq_wait_state *ws;
104	DEFINE_WAIT(wait);
 
105	int tag;
106
107	tag = __bt_get(hctx, bt);
108	if (tag != -1)
109		return tag;
 
 
 
 
 
 
 
 
 
 
 
 
110
111	if (data->flags & BLK_MQ_REQ_NOWAIT)
112		return -1;
113
114	ws = bt_wait_ptr(bt, hctx);
115	do {
116		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
117
118		tag = __bt_get(hctx, bt);
119		if (tag != -1)
120			break;
121
122		/*
123		 * We're out of tags on this hardware queue, kick any
124		 * pending IO submits before going to sleep waiting for
125		 * some to complete. Note that hctx can be NULL here for
126		 * reserved tag allocation.
127		 */
128		if (hctx)
129			blk_mq_run_hw_queue(hctx, false);
130
131		/*
132		 * Retry tag allocation after running the hardware queue,
133		 * as running the queue may also have found completions.
134		 */
135		tag = __bt_get(hctx, bt);
136		if (tag != -1)
137			break;
138
139		blk_mq_put_ctx(data->ctx);
140
 
 
 
 
 
141		io_schedule();
142
 
 
143		data->ctx = blk_mq_get_ctx(data->q);
144		data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
145		if (data->flags & BLK_MQ_REQ_RESERVED) {
146			bt = &data->hctx->tags->breserved_tags;
147		} else {
148			hctx = data->hctx;
149			bt = &hctx->tags->bitmap_tags;
150		}
151		finish_wait(&ws->wait, &wait);
152		ws = bt_wait_ptr(bt, hctx);
153	} while (1);
154
155	finish_wait(&ws->wait, &wait);
156	return tag;
157}
 
 
 
 
158
159static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
160{
161	int tag;
162
163	tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
164		     data->hctx->tags);
165	if (tag >= 0)
166		return tag + data->hctx->tags->nr_reserved_tags;
167
168	return BLK_MQ_TAG_FAIL;
169}
170
171static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
172{
173	int tag;
174
175	if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
176		WARN_ON_ONCE(1);
177		return BLK_MQ_TAG_FAIL;
178	}
179
180	tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
181		     data->hctx->tags);
182	if (tag < 0)
183		return BLK_MQ_TAG_FAIL;
184
185	return tag;
186}
187
188unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
189{
190	if (data->flags & BLK_MQ_REQ_RESERVED)
191		return __blk_mq_get_reserved_tag(data);
192	return __blk_mq_get_tag(data);
193}
194
195void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
196		    unsigned int tag)
197{
198	struct blk_mq_tags *tags = hctx->tags;
199
200	if (tag >= tags->nr_reserved_tags) {
201		const int real_tag = tag - tags->nr_reserved_tags;
202
203		BUG_ON(real_tag >= tags->nr_tags);
204		sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
205	} else {
206		BUG_ON(tag >= tags->nr_reserved_tags);
207		sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
208	}
209}
210
 
 
 
 
 
 
211struct bt_iter_data {
212	struct blk_mq_hw_ctx *hctx;
213	busy_iter_fn *fn;
 
214	void *data;
215	bool reserved;
216};
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
219{
220	struct bt_iter_data *iter_data = data;
221	struct blk_mq_hw_ctx *hctx = iter_data->hctx;
222	struct blk_mq_tags *tags = hctx->tags;
223	bool reserved = iter_data->reserved;
 
224	struct request *rq;
 
 
 
 
 
 
225
226	if (!reserved)
227		bitnr += tags->nr_reserved_tags;
228	rq = tags->rqs[bitnr];
 
 
 
 
 
 
229
230	if (rq->q == hctx->queue)
231		iter_data->fn(hctx, rq, iter_data->data, reserved);
232	return true;
 
233}
234
235static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
236			busy_iter_fn *fn, void *data, bool reserved)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237{
238	struct bt_iter_data iter_data = {
239		.hctx = hctx,
240		.fn = fn,
241		.data = data,
242		.reserved = reserved,
 
243	};
244
245	sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
246}
247
248struct bt_tags_iter_data {
249	struct blk_mq_tags *tags;
250	busy_tag_iter_fn *fn;
251	void *data;
252	bool reserved;
253};
254
 
 
 
 
255static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
256{
257	struct bt_tags_iter_data *iter_data = data;
258	struct blk_mq_tags *tags = iter_data->tags;
259	bool reserved = iter_data->reserved;
260	struct request *rq;
 
 
261
262	if (!reserved)
263		bitnr += tags->nr_reserved_tags;
264	rq = tags->rqs[bitnr];
265
266	iter_data->fn(rq, iter_data->data, reserved);
267	return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268}
269
 
 
 
 
 
 
 
 
 
 
 
 
270static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
271			     busy_tag_iter_fn *fn, void *data, bool reserved)
272{
273	struct bt_tags_iter_data iter_data = {
274		.tags = tags,
275		.fn = fn,
276		.data = data,
277		.reserved = reserved,
278	};
279
280	if (tags->rqs)
281		sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
282}
283
284static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
285		busy_tag_iter_fn *fn, void *priv)
286{
 
 
287	if (tags->nr_reserved_tags)
288		bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
289	bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290}
291
 
 
 
 
 
 
 
 
 
 
 
 
 
292void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
293		busy_tag_iter_fn *fn, void *priv)
294{
295	int i;
 
296
297	for (i = 0; i < tagset->nr_hw_queues; i++) {
 
 
298		if (tagset->tags && tagset->tags[i])
299			blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
 
300	}
301}
302EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
303
304int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
305{
306	int i, j, ret = 0;
307
308	if (!set->ops->reinit_request)
309		goto out;
 
 
310
311	for (i = 0; i < set->nr_hw_queues; i++) {
312		struct blk_mq_tags *tags = set->tags[i];
 
 
 
 
 
 
 
 
 
313
314		for (j = 0; j < tags->nr_tags; j++) {
315			if (!tags->rqs[j])
316				continue;
317
318			ret = set->ops->reinit_request(set->driver_data,
319						tags->rqs[j]);
320			if (ret)
321				goto out;
322		}
323	}
324
325out:
326	return ret;
327}
328EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
329
330void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331		void *priv)
332{
333	struct blk_mq_hw_ctx *hctx;
334	int i;
 
 
 
 
 
335
 
 
 
 
336
337	queue_for_each_hw_ctx(q, hctx, i) {
338		struct blk_mq_tags *tags = hctx->tags;
 
 
 
 
339
340		/*
341		 * If not software queues are currently mapped to this
342		 * hardware queue, there's nothing to check
343		 */
344		if (!blk_mq_hw_queue_mapped(hctx))
345			continue;
 
 
 
 
 
346
347		if (tags->nr_reserved_tags)
348			bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
349		bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
 
350	}
351
352}
353
354static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
355{
356	return bt->sb.depth - sbitmap_weight(&bt->sb);
357}
358
359static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
360		    bool round_robin, int node)
361{
362	return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
363				       node);
364}
365
366static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
367						   int node, int alloc_policy)
 
 
368{
369	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
370	bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
371
372	if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
373		goto free_tags;
374	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
375		     node))
376		goto free_bitmap_tags;
377
378	return tags;
 
379free_bitmap_tags:
380	sbitmap_queue_free(&tags->bitmap_tags);
381free_tags:
382	kfree(tags);
383	return NULL;
384}
385
386struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
387				     unsigned int reserved_tags,
388				     int node, int alloc_policy)
389{
390	struct blk_mq_tags *tags;
391
392	if (total_tags > BLK_MQ_TAG_MAX) {
393		pr_err("blk-mq: tag depth too large\n");
394		return NULL;
395	}
396
397	tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
398	if (!tags)
399		return NULL;
400
401	tags->nr_tags = total_tags;
402	tags->nr_reserved_tags = reserved_tags;
 
403
404	return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
 
 
 
 
 
 
405}
406
407void blk_mq_free_tags(struct blk_mq_tags *tags)
408{
409	sbitmap_queue_free(&tags->bitmap_tags);
410	sbitmap_queue_free(&tags->breserved_tags);
411	kfree(tags);
412}
413
414int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
 
 
415{
416	tdepth -= tags->nr_reserved_tags;
417	if (tdepth > tags->nr_tags)
 
418		return -EINVAL;
419
420	/*
421	 * Don't need (or can't) update reserved tags here, they remain
422	 * static and should never need resizing.
423	 */
424	sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
426	blk_mq_tag_wakeup_all(tags, false);
427	return 0;
428}
429
 
 
 
 
 
 
 
 
 
 
 
 
 
430/**
431 * blk_mq_unique_tag() - return a tag that is unique queue-wide
432 * @rq: request for which to compute a unique tag
433 *
434 * The tag field in struct request is unique per hardware queue but not over
435 * all hardware queues. Hence this function that returns a tag with the
436 * hardware context index in the upper bits and the per hardware queue tag in
437 * the lower bits.
438 *
439 * Note: When called for a request that is queued on a non-multiqueue request
440 * queue, the hardware context index is set to zero.
441 */
442u32 blk_mq_unique_tag(struct request *rq)
443{
444	struct request_queue *q = rq->q;
445	struct blk_mq_hw_ctx *hctx;
446	int hwq = 0;
447
448	if (q->mq_ops) {
449		hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
450		hwq = hctx->queue_num;
451	}
452
453	return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
454		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
455}
456EXPORT_SYMBOL(blk_mq_unique_tag);
457
458ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
459{
460	char *orig_page = page;
461	unsigned int free, res;
462
463	if (!tags)
464		return 0;
465
466	page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
467			"bits_per_word=%u\n",
468			tags->nr_tags, tags->nr_reserved_tags,
469			1U << tags->bitmap_tags.sb.shift);
470
471	free = bt_unused_tags(&tags->bitmap_tags);
472	res = bt_unused_tags(&tags->breserved_tags);
473
474	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
475	page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
476
477	return page - orig_page;
478}