intel_engine_pm.c - drivers/gpu/drm/i915/gt/intel_engine_pm.c - Linux source code v4.6

Note: File does not exist in v4.6.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2019 Intel Corporation
  5 */
  6
  7#include "i915_drv.h"
  8
  9#include "intel_context.h"
 10#include "intel_engine.h"
 11#include "intel_engine_heartbeat.h"
 12#include "intel_engine_pm.h"
 13#include "intel_gt.h"
 14#include "intel_gt_pm.h"
 15#include "intel_rc6.h"
 16#include "intel_ring.h"
 17#include "shmem_utils.h"
 18
 19static int __engine_unpark(struct intel_wakeref *wf)
 20{
 21	struct intel_engine_cs *engine =
 22		container_of(wf, typeof(*engine), wakeref);
 23	struct intel_context *ce;
 24
 25	ENGINE_TRACE(engine, "\n");
 26
 27	intel_gt_pm_get(engine->gt);
 28
 29	/* Discard stale context state from across idling */
 30	ce = engine->kernel_context;
 31	if (ce) {
 32		GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
 33
 34		/* First poison the image to verify we never fully trust it */
 35		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
 36			struct drm_i915_gem_object *obj = ce->state->obj;
 37			int type = i915_coherent_map_type(engine->i915);
 38			void *map;
 39
 40			map = i915_gem_object_pin_map(obj, type);
 41			if (!IS_ERR(map)) {
 42				memset(map, CONTEXT_REDZONE, obj->base.size);
 43				i915_gem_object_flush_map(obj);
 44				i915_gem_object_unpin_map(obj);
 45			}
 46		}
 47
 48		ce->ops->reset(ce);
 49	}
 50
 51	if (engine->unpark)
 52		engine->unpark(engine);
 53
 54	intel_engine_unpark_heartbeat(engine);
 55	return 0;
 56}
 57
 58#if IS_ENABLED(CONFIG_LOCKDEP)
 59
 60static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 61{
 62	unsigned long flags;
 63
 64	local_irq_save(flags);
 65	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
 66
 67	return flags;
 68}
 69
 70static inline void __timeline_mark_unlock(struct intel_context *ce,
 71					  unsigned long flags)
 72{
 73	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
 74	local_irq_restore(flags);
 75}
 76
 77#else
 78
 79static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 80{
 81	return 0;
 82}
 83
 84static inline void __timeline_mark_unlock(struct intel_context *ce,
 85					  unsigned long flags)
 86{
 87}
 88
 89#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 90
 91static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
 92{
 93	struct i915_request *rq = to_request(fence);
 94
 95	ewma__engine_latency_add(&rq->engine->latency,
 96				 ktime_us_delta(rq->fence.timestamp,
 97						rq->duration.emitted));
 98}
 99
100static void
101__queue_and_release_pm(struct i915_request *rq,
102		       struct intel_timeline *tl,
103		       struct intel_engine_cs *engine)
104{
105	struct intel_gt_timelines *timelines = &engine->gt->timelines;
106
107	ENGINE_TRACE(engine, "parking\n");
108
109	/*
110	 * We have to serialise all potential retirement paths with our
111	 * submission, as we don't want to underflow either the
112	 * engine->wakeref.counter or our timeline->active_count.
113	 *
114	 * Equally, we cannot allow a new submission to start until
115	 * after we finish queueing, nor could we allow that submitter
116	 * to retire us before we are ready!
117	 */
118	spin_lock(&timelines->lock);
119
120	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
121	if (!atomic_fetch_inc(&tl->active_count))
122		list_add_tail(&tl->link, &timelines->active_list);
123
124	/* Hand the request over to HW and so engine_retire() */
125	__i915_request_queue(rq, NULL);
126
127	/* Let new submissions commence (and maybe retire this timeline) */
128	__intel_wakeref_defer_park(&engine->wakeref);
129
130	spin_unlock(&timelines->lock);
131}
132
133static bool switch_to_kernel_context(struct intel_engine_cs *engine)
134{
135	struct intel_context *ce = engine->kernel_context;
136	struct i915_request *rq;
137	unsigned long flags;
138	bool result = true;
139
140	/* GPU is pointing to the void, as good as in the kernel context. */
141	if (intel_gt_is_wedged(engine->gt))
142		return true;
143
144	GEM_BUG_ON(!intel_context_is_barrier(ce));
145	GEM_BUG_ON(ce->timeline->hwsp_ggtt != engine->status_page.vma);
146
147	/* Already inside the kernel context, safe to power down. */
148	if (engine->wakeref_serial == engine->serial)
149		return true;
150
151	/*
152	 * Note, we do this without taking the timeline->mutex. We cannot
153	 * as we may be called while retiring the kernel context and so
154	 * already underneath the timeline->mutex. Instead we rely on the
155	 * exclusive property of the __engine_park that prevents anyone
156	 * else from creating a request on this engine. This also requires
157	 * that the ring is empty and we avoid any waits while constructing
158	 * the context, as they assume protection by the timeline->mutex.
159	 * This should hold true as we can only park the engine after
160	 * retiring the last request, thus all rings should be empty and
161	 * all timelines idle.
162	 *
163	 * For unlocking, there are 2 other parties and the GPU who have a
164	 * stake here.
165	 *
166	 * A new gpu user will be waiting on the engine-pm to start their
167	 * engine_unpark. New waiters are predicated on engine->wakeref.count
168	 * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
169	 * engine->wakeref.
170	 *
171	 * The other party is intel_gt_retire_requests(), which is walking the
172	 * list of active timelines looking for completions. Meanwhile as soon
173	 * as we call __i915_request_queue(), the GPU may complete our request.
174	 * Ergo, if we put ourselves on the timelines.active_list
175	 * (se intel_timeline_enter()) before we increment the
176	 * engine->wakeref.count, we may see the request completion and retire
177	 * it causing an underflow of the engine->wakeref.
178	 */
179	flags = __timeline_mark_lock(ce);
180	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
181
182	rq = __i915_request_create(ce, GFP_NOWAIT);
183	if (IS_ERR(rq))
184		/* Context switch failed, hope for the best! Maybe reset? */
185		goto out_unlock;
186
187	/* Check again on the next retirement. */
188	engine->wakeref_serial = engine->serial + 1;
189	i915_request_add_active_barriers(rq);
190
191	/* Install ourselves as a preemption barrier */
192	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
193	if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
194		/*
195		 * Use an interrupt for precise measurement of duration,
196		 * otherwise we rely on someone else retiring all the requests
197		 * which may delay the signaling (i.e. we will likely wait
198		 * until the background request retirement running every
199		 * second or two).
200		 */
201		BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
202		dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
203		rq->duration.emitted = ktime_get();
204	}
205
206	/* Expose ourselves to the world */
207	__queue_and_release_pm(rq, ce->timeline, engine);
208
209	result = false;
210out_unlock:
211	__timeline_mark_unlock(ce, flags);
212	return result;
213}
214
215static void call_idle_barriers(struct intel_engine_cs *engine)
216{
217	struct llist_node *node, *next;
218
219	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
220		struct dma_fence_cb *cb =
221			container_of((struct list_head *)node,
222				     typeof(*cb), node);
223
224		cb->func(ERR_PTR(-EAGAIN), cb);
225	}
226}
227
228static int __engine_park(struct intel_wakeref *wf)
229{
230	struct intel_engine_cs *engine =
231		container_of(wf, typeof(*engine), wakeref);
232
233	engine->saturated = 0;
234
235	/*
236	 * If one and only one request is completed between pm events,
237	 * we know that we are inside the kernel context and it is
238	 * safe to power down. (We are paranoid in case that runtime
239	 * suspend causes corruption to the active context image, and
240	 * want to avoid that impacting userspace.)
241	 */
242	if (!switch_to_kernel_context(engine))
243		return -EBUSY;
244
245	ENGINE_TRACE(engine, "parked\n");
246
247	call_idle_barriers(engine); /* cleanup after wedging */
248
249	intel_engine_park_heartbeat(engine);
250	intel_engine_disarm_breadcrumbs(engine);
251
252	/* Must be reset upon idling, or we may miss the busy wakeup. */
253	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
254
255	if (engine->park)
256		engine->park(engine);
257
258	engine->execlists.no_priolist = false;
259
260	/* While gt calls i915_vma_parked(), we have to break the lock cycle */
261	intel_gt_pm_put_async(engine->gt);
262	return 0;
263}
264
265static const struct intel_wakeref_ops wf_ops = {
266	.get = __engine_unpark,
267	.put = __engine_park,
268};
269
270void intel_engine_init__pm(struct intel_engine_cs *engine)
271{
272	struct intel_runtime_pm *rpm = engine->uncore->rpm;
273
274	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
275	intel_engine_init_heartbeat(engine);
276}
277
278#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
279#include "selftest_engine_pm.c"
280#endif