Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2019 Intel Corporation
  5 */
  6
  7#include <linux/debugobjects.h>
  8
  9#include "gt/intel_engine_pm.h"
 10
 11#include "i915_drv.h"
 12#include "i915_active.h"
 13#include "i915_globals.h"
 14
 15#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
 16
 17/*
 18 * Active refs memory management
 19 *
 20 * To be more economical with memory, we reap all the i915_active trees as
 21 * they idle (when we know the active requests are inactive) and allocate the
 22 * nodes from a local slab cache to hopefully reduce the fragmentation.
 23 */
 24static struct i915_global_active {
 25	struct i915_global base;
 26	struct kmem_cache *slab_cache;
 27} global;
 28
 29struct active_node {
 30	struct i915_active_request base;
 31	struct i915_active *ref;
 32	struct rb_node node;
 33	u64 timeline;
 34};
 35
 36static inline struct active_node *
 37node_from_active(struct i915_active_request *active)
 38{
 39	return container_of(active, struct active_node, base);
 40}
 41
 42#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
 43
 44static inline bool is_barrier(const struct i915_active_request *active)
 45{
 46	return IS_ERR(rcu_access_pointer(active->request));
 47}
 48
 49static inline struct llist_node *barrier_to_ll(struct active_node *node)
 50{
 51	GEM_BUG_ON(!is_barrier(&node->base));
 52	return (struct llist_node *)&node->base.link;
 53}
 54
 55static inline struct intel_engine_cs *
 56__barrier_to_engine(struct active_node *node)
 57{
 58	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
 59}
 60
 61static inline struct intel_engine_cs *
 62barrier_to_engine(struct active_node *node)
 63{
 64	GEM_BUG_ON(!is_barrier(&node->base));
 65	return __barrier_to_engine(node);
 66}
 67
 68static inline struct active_node *barrier_from_ll(struct llist_node *x)
 69{
 70	return container_of((struct list_head *)x,
 71			    struct active_node, base.link);
 72}
 73
 74#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
 75
 76static void *active_debug_hint(void *addr)
 77{
 78	struct i915_active *ref = addr;
 79
 80	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
 81}
 82
 83static struct debug_obj_descr active_debug_desc = {
 84	.name = "i915_active",
 85	.debug_hint = active_debug_hint,
 86};
 87
 88static void debug_active_init(struct i915_active *ref)
 89{
 90	debug_object_init(ref, &active_debug_desc);
 91}
 92
 93static void debug_active_activate(struct i915_active *ref)
 94{
 95	debug_object_activate(ref, &active_debug_desc);
 96}
 97
 98static void debug_active_deactivate(struct i915_active *ref)
 99{
100	debug_object_deactivate(ref, &active_debug_desc);
101}
102
103static void debug_active_fini(struct i915_active *ref)
104{
105	debug_object_free(ref, &active_debug_desc);
106}
107
108static void debug_active_assert(struct i915_active *ref)
109{
110	debug_object_assert_init(ref, &active_debug_desc);
111}
112
113#else
114
115static inline void debug_active_init(struct i915_active *ref) { }
116static inline void debug_active_activate(struct i915_active *ref) { }
117static inline void debug_active_deactivate(struct i915_active *ref) { }
118static inline void debug_active_fini(struct i915_active *ref) { }
119static inline void debug_active_assert(struct i915_active *ref) { }
120
121#endif
122
123static void
124__active_retire(struct i915_active *ref)
125{
126	struct active_node *it, *n;
127	struct rb_root root;
128	bool retire = false;
129
130	lockdep_assert_held(&ref->mutex);
131
132	/* return the unused nodes to our slabcache -- flushing the allocator */
133	if (atomic_dec_and_test(&ref->count)) {
134		debug_active_deactivate(ref);
135		root = ref->tree;
136		ref->tree = RB_ROOT;
137		ref->cache = NULL;
138		retire = true;
139	}
140
141	mutex_unlock(&ref->mutex);
142	if (!retire)
143		return;
144
145	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
146		GEM_BUG_ON(i915_active_request_isset(&it->base));
147		kmem_cache_free(global.slab_cache, it);
148	}
149
150	/* After the final retire, the entire struct may be freed */
151	if (ref->retire)
152		ref->retire(ref);
153}
154
155static void
156active_retire(struct i915_active *ref)
157{
158	GEM_BUG_ON(!atomic_read(&ref->count));
159	if (atomic_add_unless(&ref->count, -1, 1))
160		return;
161
162	/* One active may be flushed from inside the acquire of another */
163	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
164	__active_retire(ref);
165}
166
167static void
168node_retire(struct i915_active_request *base, struct i915_request *rq)
169{
170	active_retire(node_from_active(base)->ref);
171}
172
173static struct i915_active_request *
174active_instance(struct i915_active *ref, struct intel_timeline *tl)
175{
176	struct active_node *node, *prealloc;
177	struct rb_node **p, *parent;
178	u64 idx = tl->fence_context;
179
180	/*
181	 * We track the most recently used timeline to skip a rbtree search
182	 * for the common case, under typical loads we never need the rbtree
183	 * at all. We can reuse the last slot if it is empty, that is
184	 * after the previous activity has been retired, or if it matches the
185	 * current timeline.
186	 */
187	node = READ_ONCE(ref->cache);
188	if (node && node->timeline == idx)
189		return &node->base;
190
191	/* Preallocate a replacement, just in case */
192	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
193	if (!prealloc)
194		return NULL;
195
196	mutex_lock(&ref->mutex);
197	GEM_BUG_ON(i915_active_is_idle(ref));
198
199	parent = NULL;
200	p = &ref->tree.rb_node;
201	while (*p) {
202		parent = *p;
203
204		node = rb_entry(parent, struct active_node, node);
205		if (node->timeline == idx) {
206			kmem_cache_free(global.slab_cache, prealloc);
207			goto out;
208		}
209
210		if (node->timeline < idx)
211			p = &parent->rb_right;
212		else
213			p = &parent->rb_left;
214	}
215
216	node = prealloc;
217	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
218	node->ref = ref;
219	node->timeline = idx;
220
221	rb_link_node(&node->node, parent, p);
222	rb_insert_color(&node->node, &ref->tree);
223
224out:
225	ref->cache = node;
226	mutex_unlock(&ref->mutex);
227
228	BUILD_BUG_ON(offsetof(typeof(*node), base));
229	return &node->base;
230}
231
232void __i915_active_init(struct drm_i915_private *i915,
233			struct i915_active *ref,
234			int (*active)(struct i915_active *ref),
235			void (*retire)(struct i915_active *ref),
236			struct lock_class_key *key)
237{
238	debug_active_init(ref);
239
240	ref->i915 = i915;
241	ref->flags = 0;
242	ref->active = active;
243	ref->retire = retire;
244	ref->tree = RB_ROOT;
245	ref->cache = NULL;
246	init_llist_head(&ref->preallocated_barriers);
247	atomic_set(&ref->count, 0);
248	__mutex_init(&ref->mutex, "i915_active", key);
249}
250
251static bool ____active_del_barrier(struct i915_active *ref,
252				   struct active_node *node,
253				   struct intel_engine_cs *engine)
254
255{
256	struct llist_node *head = NULL, *tail = NULL;
257	struct llist_node *pos, *next;
258
259	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
260
261	/*
262	 * Rebuild the llist excluding our node. We may perform this
263	 * outside of the kernel_context timeline mutex and so someone
264	 * else may be manipulating the engine->barrier_tasks, in
265	 * which case either we or they will be upset :)
266	 *
267	 * A second __active_del_barrier() will report failure to claim
268	 * the active_node and the caller will just shrug and know not to
269	 * claim ownership of its node.
270	 *
271	 * A concurrent i915_request_add_active_barriers() will miss adding
272	 * any of the tasks, but we will try again on the next -- and since
273	 * we are actively using the barrier, we know that there will be
274	 * at least another opportunity when we idle.
275	 */
276	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
277		if (node == barrier_from_ll(pos)) {
278			node = NULL;
279			continue;
280		}
281
282		pos->next = head;
283		head = pos;
284		if (!tail)
285			tail = pos;
286	}
287	if (head)
288		llist_add_batch(head, tail, &engine->barrier_tasks);
289
290	return !node;
291}
292
293static bool
294__active_del_barrier(struct i915_active *ref, struct active_node *node)
295{
296	return ____active_del_barrier(ref, node, barrier_to_engine(node));
297}
298
299int i915_active_ref(struct i915_active *ref,
300		    struct intel_timeline *tl,
301		    struct i915_request *rq)
302{
303	struct i915_active_request *active;
304	int err;
305
306	lockdep_assert_held(&tl->mutex);
307
308	/* Prevent reaping in case we malloc/wait while building the tree */
309	err = i915_active_acquire(ref);
310	if (err)
311		return err;
312
313	active = active_instance(ref, tl);
314	if (!active) {
315		err = -ENOMEM;
316		goto out;
317	}
318
319	if (is_barrier(active)) { /* proto-node used by our idle barrier */
320		/*
321		 * This request is on the kernel_context timeline, and so
322		 * we can use it to substitute for the pending idle-barrer
323		 * request that we want to emit on the kernel_context.
324		 */
325		__active_del_barrier(ref, node_from_active(active));
326		RCU_INIT_POINTER(active->request, NULL);
327		INIT_LIST_HEAD(&active->link);
328	} else {
329		if (!i915_active_request_isset(active))
330			atomic_inc(&ref->count);
331	}
332	GEM_BUG_ON(!atomic_read(&ref->count));
333	__i915_active_request_set(active, rq);
334
335out:
336	i915_active_release(ref);
337	return err;
338}
339
340int i915_active_acquire(struct i915_active *ref)
341{
342	int err;
343
344	debug_active_assert(ref);
345	if (atomic_add_unless(&ref->count, 1, 0))
346		return 0;
347
348	err = mutex_lock_interruptible(&ref->mutex);
349	if (err)
350		return err;
351
352	if (!atomic_read(&ref->count) && ref->active)
353		err = ref->active(ref);
354	if (!err) {
355		debug_active_activate(ref);
356		atomic_inc(&ref->count);
357	}
358
359	mutex_unlock(&ref->mutex);
360
361	return err;
362}
363
364void i915_active_release(struct i915_active *ref)
365{
366	debug_active_assert(ref);
367	active_retire(ref);
368}
369
370static void __active_ungrab(struct i915_active *ref)
371{
372	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
373}
374
375bool i915_active_trygrab(struct i915_active *ref)
376{
377	debug_active_assert(ref);
378
379	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
380		return false;
381
382	if (!atomic_add_unless(&ref->count, 1, 0)) {
383		__active_ungrab(ref);
384		return false;
385	}
386
387	return true;
388}
389
390void i915_active_ungrab(struct i915_active *ref)
391{
392	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
393
394	active_retire(ref);
395	__active_ungrab(ref);
396}
397
398int i915_active_wait(struct i915_active *ref)
399{
400	struct active_node *it, *n;
401	int err;
402
403	might_sleep();
404	might_lock(&ref->mutex);
405
406	if (i915_active_is_idle(ref))
407		return 0;
408
409	err = mutex_lock_interruptible(&ref->mutex);
410	if (err)
411		return err;
412
413	if (!atomic_add_unless(&ref->count, 1, 0)) {
414		mutex_unlock(&ref->mutex);
415		return 0;
416	}
417
418	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
419		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
420			err = -EBUSY;
421			break;
422		}
423
424		err = i915_active_request_retire(&it->base, BKL(ref));
425		if (err)
426			break;
427	}
428
429	__active_retire(ref);
430	if (err)
431		return err;
432
433	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
434		return -EINTR;
435
436	if (!i915_active_is_idle(ref))
437		return -EBUSY;
438
439	return 0;
440}
441
442int i915_request_await_active_request(struct i915_request *rq,
443				      struct i915_active_request *active)
444{
445	struct i915_request *barrier =
446		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
447
448	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
449}
450
451int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
452{
453	struct active_node *it, *n;
454	int err;
455
456	if (RB_EMPTY_ROOT(&ref->tree))
457		return 0;
458
459	/* await allocates and so we need to avoid hitting the shrinker */
460	err = i915_active_acquire(ref);
461	if (err)
462		return err;
463
464	mutex_lock(&ref->mutex);
465	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
466		err = i915_request_await_active_request(rq, &it->base);
467		if (err)
468			break;
469	}
470	mutex_unlock(&ref->mutex);
471
472	i915_active_release(ref);
473	return err;
474}
475
476#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
477void i915_active_fini(struct i915_active *ref)
478{
479	debug_active_fini(ref);
480	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
481	GEM_BUG_ON(atomic_read(&ref->count));
482	mutex_destroy(&ref->mutex);
483}
484#endif
485
486static inline bool is_idle_barrier(struct active_node *node, u64 idx)
487{
488	return node->timeline == idx && !i915_active_request_isset(&node->base);
489}
490
491static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
492{
493	struct rb_node *prev, *p;
494
495	if (RB_EMPTY_ROOT(&ref->tree))
496		return NULL;
497
498	mutex_lock(&ref->mutex);
499	GEM_BUG_ON(i915_active_is_idle(ref));
500
501	/*
502	 * Try to reuse any existing barrier nodes already allocated for this
503	 * i915_active, due to overlapping active phases there is likely a
504	 * node kept alive (as we reuse before parking). We prefer to reuse
505	 * completely idle barriers (less hassle in manipulating the llists),
506	 * but otherwise any will do.
507	 */
508	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
509		p = &ref->cache->node;
510		goto match;
511	}
512
513	prev = NULL;
514	p = ref->tree.rb_node;
515	while (p) {
516		struct active_node *node =
517			rb_entry(p, struct active_node, node);
518
519		if (is_idle_barrier(node, idx))
520			goto match;
521
522		prev = p;
523		if (node->timeline < idx)
524			p = p->rb_right;
525		else
526			p = p->rb_left;
527	}
528
529	/*
530	 * No quick match, but we did find the leftmost rb_node for the
531	 * kernel_context. Walk the rb_tree in-order to see if there were
532	 * any idle-barriers on this timeline that we missed, or just use
533	 * the first pending barrier.
534	 */
535	for (p = prev; p; p = rb_next(p)) {
536		struct active_node *node =
537			rb_entry(p, struct active_node, node);
538		struct intel_engine_cs *engine;
539
540		if (node->timeline > idx)
541			break;
542
543		if (node->timeline < idx)
544			continue;
545
546		if (is_idle_barrier(node, idx))
547			goto match;
548
549		/*
550		 * The list of pending barriers is protected by the
551		 * kernel_context timeline, which notably we do not hold
552		 * here. i915_request_add_active_barriers() may consume
553		 * the barrier before we claim it, so we have to check
554		 * for success.
555		 */
556		engine = __barrier_to_engine(node);
557		smp_rmb(); /* serialise with add_active_barriers */
558		if (is_barrier(&node->base) &&
559		    ____active_del_barrier(ref, node, engine))
560			goto match;
561	}
562
563	mutex_unlock(&ref->mutex);
564
565	return NULL;
566
567match:
568	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
569	if (p == &ref->cache->node)
570		ref->cache = NULL;
571	mutex_unlock(&ref->mutex);
572
573	return rb_entry(p, struct active_node, node);
574}
575
576int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
577					    struct intel_engine_cs *engine)
578{
579	struct drm_i915_private *i915 = engine->i915;
580	intel_engine_mask_t tmp, mask = engine->mask;
581	struct llist_node *pos, *next;
582	int err;
583
584	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
585
586	/*
587	 * Preallocate a node for each physical engine supporting the target
588	 * engine (remember virtual engines have more than one sibling).
589	 * We can then use the preallocated nodes in
590	 * i915_active_acquire_barrier()
591	 */
592	for_each_engine_masked(engine, i915, mask, tmp) {
593		u64 idx = engine->kernel_context->timeline->fence_context;
594		struct active_node *node;
595
596		node = reuse_idle_barrier(ref, idx);
597		if (!node) {
598			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
599			if (!node) {
600				err = ENOMEM;
601				goto unwind;
602			}
603
604#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
605			node->base.lock =
606				&engine->kernel_context->timeline->mutex;
607#endif
608			RCU_INIT_POINTER(node->base.request, NULL);
609			node->base.retire = node_retire;
610			node->timeline = idx;
611			node->ref = ref;
612		}
613
614		if (!i915_active_request_isset(&node->base)) {
615			/*
616			 * Mark this as being *our* unconnected proto-node.
617			 *
618			 * Since this node is not in any list, and we have
619			 * decoupled it from the rbtree, we can reuse the
620			 * request to indicate this is an idle-barrier node
621			 * and then we can use the rb_node and list pointers
622			 * for our tracking of the pending barrier.
623			 */
624			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
625			node->base.link.prev = (void *)engine;
626			atomic_inc(&ref->count);
627		}
628
629		GEM_BUG_ON(barrier_to_engine(node) != engine);
630		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
631		intel_engine_pm_get(engine);
632	}
633
634	return 0;
635
636unwind:
637	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
638		struct active_node *node = barrier_from_ll(pos);
639
640		atomic_dec(&ref->count);
641		intel_engine_pm_put(barrier_to_engine(node));
642
643		kmem_cache_free(global.slab_cache, node);
644	}
645	return err;
646}
647
648void i915_active_acquire_barrier(struct i915_active *ref)
649{
650	struct llist_node *pos, *next;
651
652	GEM_BUG_ON(i915_active_is_idle(ref));
653
654	/*
655	 * Transfer the list of preallocated barriers into the
656	 * i915_active rbtree, but only as proto-nodes. They will be
657	 * populated by i915_request_add_active_barriers() to point to the
658	 * request that will eventually release them.
659	 */
660	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
661	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
662		struct active_node *node = barrier_from_ll(pos);
663		struct intel_engine_cs *engine = barrier_to_engine(node);
664		struct rb_node **p, *parent;
665
666		parent = NULL;
667		p = &ref->tree.rb_node;
668		while (*p) {
669			struct active_node *it;
670
671			parent = *p;
672
673			it = rb_entry(parent, struct active_node, node);
674			if (it->timeline < node->timeline)
675				p = &parent->rb_right;
676			else
677				p = &parent->rb_left;
678		}
679		rb_link_node(&node->node, parent, p);
680		rb_insert_color(&node->node, &ref->tree);
681
682		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
683		intel_engine_pm_put(engine);
684	}
685	mutex_unlock(&ref->mutex);
686}
687
688void i915_request_add_active_barriers(struct i915_request *rq)
689{
690	struct intel_engine_cs *engine = rq->engine;
691	struct llist_node *node, *next;
692
693	GEM_BUG_ON(intel_engine_is_virtual(engine));
694	GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline);
695
696	/*
697	 * Attach the list of proto-fences to the in-flight request such
698	 * that the parent i915_active will be released when this request
699	 * is retired.
700	 */
701	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
702		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
703		smp_wmb(); /* serialise with reuse_idle_barrier */
704		list_add_tail((struct list_head *)node, &rq->active_list);
705	}
706}
707
708int i915_active_request_set(struct i915_active_request *active,
709			    struct i915_request *rq)
710{
711	int err;
712
713#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
714	lockdep_assert_held(active->lock);
715#endif
716
717	/* Must maintain ordering wrt previous active requests */
718	err = i915_request_await_active_request(rq, active);
719	if (err)
720		return err;
721
722	__i915_active_request_set(active, rq);
723	return 0;
724}
725
726void i915_active_retire_noop(struct i915_active_request *active,
727			     struct i915_request *request)
728{
729	/* Space left intentionally blank */
730}
731
732#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
733#include "selftests/i915_active.c"
734#endif
735
736static void i915_global_active_shrink(void)
737{
738	kmem_cache_shrink(global.slab_cache);
739}
740
741static void i915_global_active_exit(void)
742{
743	kmem_cache_destroy(global.slab_cache);
744}
745
746static struct i915_global_active global = { {
747	.shrink = i915_global_active_shrink,
748	.exit = i915_global_active_exit,
749} };
750
751int __init i915_global_active_init(void)
752{
753	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
754	if (!global.slab_cache)
755		return -ENOMEM;
756
757	i915_global_register(&global.base);
758	return 0;
759}