Linux Audio

Check our new training course

Loading...
v5.14.15
  1// SPDX-License-Identifier: MIT
  2/*
 
 
  3 * Copyright © 2019 Intel Corporation
  4 */
  5
  6#include "i915_drv.h"
  7#include "i915_request.h"
  8
  9#include "intel_context.h"
 10#include "intel_engine_heartbeat.h"
 11#include "intel_engine_pm.h"
 12#include "intel_engine.h"
 13#include "intel_gt.h"
 14#include "intel_reset.h"
 15
 16/*
 17 * While the engine is active, we send a periodic pulse along the engine
 18 * to check on its health and to flush any idle-barriers. If that request
 19 * is stuck, and we fail to preempt it, we declare the engine hung and
 20 * issue a reset -- in the hope that restores progress.
 21 */
 22
 23static bool next_heartbeat(struct intel_engine_cs *engine)
 24{
 25	long delay;
 26
 27	delay = READ_ONCE(engine->props.heartbeat_interval_ms);
 28	if (!delay)
 29		return false;
 30
 31	delay = msecs_to_jiffies_timeout(delay);
 32	if (delay >= HZ)
 33		delay = round_jiffies_up_relative(delay);
 34	mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay + 1);
 35
 36	return true;
 37}
 38
 39static struct i915_request *
 40heartbeat_create(struct intel_context *ce, gfp_t gfp)
 41{
 42	struct i915_request *rq;
 43
 44	intel_context_enter(ce);
 45	rq = __i915_request_create(ce, gfp);
 46	intel_context_exit(ce);
 47
 48	return rq;
 49}
 50
 51static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 52{
 53	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
 54	i915_request_add_active_barriers(rq);
 55	if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine))
 56		engine->heartbeat.systole = i915_request_get(rq);
 57}
 58
 59static void heartbeat_commit(struct i915_request *rq,
 60			     const struct i915_sched_attr *attr)
 61{
 62	idle_pulse(rq->engine, rq);
 63
 64	__i915_request_commit(rq);
 65	__i915_request_queue(rq, attr);
 66}
 67
 68static void show_heartbeat(const struct i915_request *rq,
 69			   struct intel_engine_cs *engine)
 70{
 71	struct drm_printer p = drm_debug_printer("heartbeat");
 72
 73	intel_engine_dump(engine, &p,
 74			  "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
 75			  engine->name,
 76			  rq->fence.context,
 77			  rq->fence.seqno,
 78			  rq->sched.attr.priority);
 79}
 80
 81static void heartbeat(struct work_struct *wrk)
 82{
 83	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
 
 
 84	struct intel_engine_cs *engine =
 85		container_of(wrk, typeof(*engine), heartbeat.work.work);
 86	struct intel_context *ce = engine->kernel_context;
 87	struct i915_request *rq;
 88	unsigned long serial;
 89
 90	/* Just in case everything has gone horribly wrong, give it a kick */
 91	intel_engine_flush_submission(engine);
 92
 93	rq = engine->heartbeat.systole;
 94	if (rq && i915_request_completed(rq)) {
 95		i915_request_put(rq);
 96		engine->heartbeat.systole = NULL;
 97	}
 98
 99	if (!intel_engine_pm_get_if_awake(engine))
100		return;
101
102	if (intel_gt_is_wedged(engine->gt))
103		goto out;
104
105	if (engine->heartbeat.systole) {
106		long delay = READ_ONCE(engine->props.heartbeat_interval_ms);
107
108		/* Safeguard against too-fast worker invocations */
109		if (!time_after(jiffies,
110				rq->emitted_jiffies + msecs_to_jiffies(delay)))
111			goto out;
112
113		if (!i915_sw_fence_signaled(&rq->submit)) {
114			/*
115			 * Not yet submitted, system is stalled.
116			 *
117			 * This more often happens for ring submission,
118			 * where all contexts are funnelled into a common
119			 * ringbuffer. If one context is blocked on an
120			 * external fence, not only is it not submitted,
121			 * but all other contexts, including the kernel
122			 * context are stuck waiting for the signal.
123			 */
124		} else if (engine->schedule &&
125			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
126			/*
127			 * Gradually raise the priority of the heartbeat to
128			 * give high priority work [which presumably desires
129			 * low latency and no jitter] the chance to naturally
130			 * complete before being preempted.
131			 */
132			attr.priority = 0;
133			if (rq->sched.attr.priority >= attr.priority)
134				attr.priority = I915_PRIORITY_HEARTBEAT;
135			if (rq->sched.attr.priority >= attr.priority)
136				attr.priority = I915_PRIORITY_BARRIER;
137
138			local_bh_disable();
139			engine->schedule(rq, &attr);
140			local_bh_enable();
141		} else {
142			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
143				show_heartbeat(rq, engine);
144
145			intel_gt_handle_error(engine->gt, engine->mask,
146					      I915_ERROR_CAPTURE,
147					      "stopped heartbeat on %s",
148					      engine->name);
149		}
150
151		rq->emitted_jiffies = jiffies;
152		goto out;
153	}
154
155	serial = READ_ONCE(engine->serial);
156	if (engine->wakeref_serial == serial)
157		goto out;
158
159	if (!mutex_trylock(&ce->timeline->mutex)) {
160		/* Unable to lock the kernel timeline, is the engine stuck? */
161		if (xchg(&engine->heartbeat.blocked, serial) == serial)
162			intel_gt_handle_error(engine->gt, engine->mask,
163					      I915_ERROR_CAPTURE,
164					      "no heartbeat on %s",
165					      engine->name);
166		goto out;
167	}
168
169	rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
 
 
170	if (IS_ERR(rq))
171		goto unlock;
172
173	heartbeat_commit(rq, &attr);
 
 
 
 
 
174
175unlock:
176	mutex_unlock(&ce->timeline->mutex);
177out:
178	if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine))
179		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
180	intel_engine_pm_put(engine);
181}
182
183void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
184{
185	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
186		return;
187
188	next_heartbeat(engine);
189}
190
191void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
192{
193	if (cancel_delayed_work(&engine->heartbeat.work))
194		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
195}
196
197void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
198{
199	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
200}
201
202static int __intel_engine_pulse(struct intel_engine_cs *engine)
203{
204	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
205	struct intel_context *ce = engine->kernel_context;
206	struct i915_request *rq;
207
208	lockdep_assert_held(&ce->timeline->mutex);
209	GEM_BUG_ON(!intel_engine_has_preemption(engine));
210	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
211
212	rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
213	if (IS_ERR(rq))
214		return PTR_ERR(rq);
215
216	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
217
218	heartbeat_commit(rq, &attr);
219	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
220
221	return 0;
222}
223
224static unsigned long set_heartbeat(struct intel_engine_cs *engine,
225				   unsigned long delay)
226{
227	unsigned long old;
228
229	old = xchg(&engine->props.heartbeat_interval_ms, delay);
230	if (delay)
231		intel_engine_unpark_heartbeat(engine);
232	else
233		intel_engine_park_heartbeat(engine);
234
235	return old;
236}
237
238int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
239			       unsigned long delay)
240{
241	struct intel_context *ce = engine->kernel_context;
242	int err = 0;
243
244	if (!delay && !intel_engine_has_preempt_reset(engine))
245		return -ENODEV;
246
247	intel_engine_pm_get(engine);
248
249	err = mutex_lock_interruptible(&ce->timeline->mutex);
250	if (err)
251		goto out_rpm;
 
 
 
252
253	if (delay != engine->props.heartbeat_interval_ms) {
254		unsigned long saved = set_heartbeat(engine, delay);
255
256		/* recheck current execution */
257		if (intel_engine_has_preemption(engine)) {
258			err = __intel_engine_pulse(engine);
259			if (err)
260				set_heartbeat(engine, saved);
261		}
262	}
263
264	mutex_unlock(&ce->timeline->mutex);
265
266out_rpm:
267	intel_engine_pm_put(engine);
268	return err;
269}
270
271int intel_engine_pulse(struct intel_engine_cs *engine)
272{
 
273	struct intel_context *ce = engine->kernel_context;
 
274	int err;
275
276	if (!intel_engine_has_preemption(engine))
277		return -ENODEV;
278
279	if (!intel_engine_pm_get_if_awake(engine))
280		return 0;
281
282	err = -EINTR;
283	if (!mutex_lock_interruptible(&ce->timeline->mutex)) {
284		err = __intel_engine_pulse(engine);
285		mutex_unlock(&ce->timeline->mutex);
286	}
287
288	intel_engine_flush_submission(engine);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289	intel_engine_pm_put(engine);
290	return err;
291}
292
293int intel_engine_flush_barriers(struct intel_engine_cs *engine)
294{
295	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
296	struct intel_context *ce = engine->kernel_context;
297	struct i915_request *rq;
298	int err;
299
300	if (llist_empty(&engine->barrier_tasks))
301		return 0;
302
303	if (!intel_engine_pm_get_if_awake(engine))
304		return 0;
305
306	if (mutex_lock_interruptible(&ce->timeline->mutex)) {
307		err = -EINTR;
308		goto out_rpm;
309	}
310
311	rq = heartbeat_create(ce, GFP_KERNEL);
312	if (IS_ERR(rq)) {
313		err = PTR_ERR(rq);
314		goto out_unlock;
315	}
316
317	heartbeat_commit(rq, &attr);
 
318
319	err = 0;
320out_unlock:
321	mutex_unlock(&ce->timeline->mutex);
322out_rpm:
323	intel_engine_pm_put(engine);
324	return err;
325}
326
327#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
328#include "selftest_engine_heartbeat.c"
329#endif
v5.9
 
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2019 Intel Corporation
  5 */
  6
  7#include "i915_drv.h"
  8#include "i915_request.h"
  9
 10#include "intel_context.h"
 11#include "intel_engine_heartbeat.h"
 12#include "intel_engine_pm.h"
 13#include "intel_engine.h"
 14#include "intel_gt.h"
 15#include "intel_reset.h"
 16
 17/*
 18 * While the engine is active, we send a periodic pulse along the engine
 19 * to check on its health and to flush any idle-barriers. If that request
 20 * is stuck, and we fail to preempt it, we declare the engine hung and
 21 * issue a reset -- in the hope that restores progress.
 22 */
 23
 24static bool next_heartbeat(struct intel_engine_cs *engine)
 25{
 26	long delay;
 27
 28	delay = READ_ONCE(engine->props.heartbeat_interval_ms);
 29	if (!delay)
 30		return false;
 31
 32	delay = msecs_to_jiffies_timeout(delay);
 33	if (delay >= HZ)
 34		delay = round_jiffies_up_relative(delay);
 35	mod_delayed_work(system_highpri_wq, &engine->heartbeat.work, delay);
 36
 37	return true;
 38}
 39
 
 
 
 
 
 
 
 
 
 
 
 
 40static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 41{
 42	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
 43	i915_request_add_active_barriers(rq);
 
 
 
 
 
 
 
 
 
 
 
 44}
 45
 46static void show_heartbeat(const struct i915_request *rq,
 47			   struct intel_engine_cs *engine)
 48{
 49	struct drm_printer p = drm_debug_printer("heartbeat");
 50
 51	intel_engine_dump(engine, &p,
 52			  "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
 53			  engine->name,
 54			  rq->fence.context,
 55			  rq->fence.seqno,
 56			  rq->sched.attr.priority);
 57}
 58
 59static void heartbeat(struct work_struct *wrk)
 60{
 61	struct i915_sched_attr attr = {
 62		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
 63	};
 64	struct intel_engine_cs *engine =
 65		container_of(wrk, typeof(*engine), heartbeat.work.work);
 66	struct intel_context *ce = engine->kernel_context;
 67	struct i915_request *rq;
 68	unsigned long serial;
 69
 70	/* Just in case everything has gone horribly wrong, give it a kick */
 71	intel_engine_flush_submission(engine);
 72
 73	rq = engine->heartbeat.systole;
 74	if (rq && i915_request_completed(rq)) {
 75		i915_request_put(rq);
 76		engine->heartbeat.systole = NULL;
 77	}
 78
 79	if (!intel_engine_pm_get_if_awake(engine))
 80		return;
 81
 82	if (intel_gt_is_wedged(engine->gt))
 83		goto out;
 84
 85	if (engine->heartbeat.systole) {
 
 
 
 
 
 
 
 86		if (!i915_sw_fence_signaled(&rq->submit)) {
 87			/*
 88			 * Not yet submitted, system is stalled.
 89			 *
 90			 * This more often happens for ring submission,
 91			 * where all contexts are funnelled into a common
 92			 * ringbuffer. If one context is blocked on an
 93			 * external fence, not only is it not submitted,
 94			 * but all other contexts, including the kernel
 95			 * context are stuck waiting for the signal.
 96			 */
 97		} else if (engine->schedule &&
 98			   rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 99			/*
100			 * Gradually raise the priority of the heartbeat to
101			 * give high priority work [which presumably desires
102			 * low latency and no jitter] the chance to naturally
103			 * complete before being preempted.
104			 */
105			attr.priority = I915_PRIORITY_MASK;
106			if (rq->sched.attr.priority >= attr.priority)
107				attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
108			if (rq->sched.attr.priority >= attr.priority)
109				attr.priority = I915_PRIORITY_BARRIER;
110
111			local_bh_disable();
112			engine->schedule(rq, &attr);
113			local_bh_enable();
114		} else {
115			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
116				show_heartbeat(rq, engine);
117
118			intel_gt_handle_error(engine->gt, engine->mask,
119					      I915_ERROR_CAPTURE,
120					      "stopped heartbeat on %s",
121					      engine->name);
122		}
 
 
123		goto out;
124	}
125
126	serial = READ_ONCE(engine->serial);
127	if (engine->wakeref_serial == serial)
128		goto out;
129
130	if (!mutex_trylock(&ce->timeline->mutex)) {
131		/* Unable to lock the kernel timeline, is the engine stuck? */
132		if (xchg(&engine->heartbeat.blocked, serial) == serial)
133			intel_gt_handle_error(engine->gt, engine->mask,
134					      I915_ERROR_CAPTURE,
135					      "no heartbeat on %s",
136					      engine->name);
137		goto out;
138	}
139
140	intel_context_enter(ce);
141	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
142	intel_context_exit(ce);
143	if (IS_ERR(rq))
144		goto unlock;
145
146	idle_pulse(engine, rq);
147	if (engine->i915->params.enable_hangcheck)
148		engine->heartbeat.systole = i915_request_get(rq);
149
150	__i915_request_commit(rq);
151	__i915_request_queue(rq, &attr);
152
153unlock:
154	mutex_unlock(&ce->timeline->mutex);
155out:
156	if (!next_heartbeat(engine))
157		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
158	intel_engine_pm_put(engine);
159}
160
161void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
162{
163	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
164		return;
165
166	next_heartbeat(engine);
167}
168
169void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
170{
171	if (cancel_delayed_work(&engine->heartbeat.work))
172		i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
173}
174
175void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
176{
177	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
178}
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
181			       unsigned long delay)
182{
183	int err;
 
 
 
 
 
 
184
185	/* Send one last pulse before to cleanup persistent hogs */
186	if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
187		err = intel_engine_pulse(engine);
188		if (err)
189			return err;
190	}
191
192	WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
 
193
194	if (intel_engine_pm_get_if_awake(engine)) {
195		if (delay)
196			intel_engine_unpark_heartbeat(engine);
197		else
198			intel_engine_park_heartbeat(engine);
199		intel_engine_pm_put(engine);
200	}
201
202	return 0;
 
 
 
 
203}
204
205int intel_engine_pulse(struct intel_engine_cs *engine)
206{
207	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
208	struct intel_context *ce = engine->kernel_context;
209	struct i915_request *rq;
210	int err;
211
212	if (!intel_engine_has_preemption(engine))
213		return -ENODEV;
214
215	if (!intel_engine_pm_get_if_awake(engine))
216		return 0;
217
218	if (mutex_lock_interruptible(&ce->timeline->mutex)) {
219		err = -EINTR;
220		goto out_rpm;
 
221	}
222
223	intel_context_enter(ce);
224	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
225	intel_context_exit(ce);
226	if (IS_ERR(rq)) {
227		err = PTR_ERR(rq);
228		goto out_unlock;
229	}
230
231	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
232	idle_pulse(engine, rq);
233
234	__i915_request_commit(rq);
235	__i915_request_queue(rq, &attr);
236	GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
237	err = 0;
238
239out_unlock:
240	mutex_unlock(&ce->timeline->mutex);
241out_rpm:
242	intel_engine_pm_put(engine);
243	return err;
244}
245
246int intel_engine_flush_barriers(struct intel_engine_cs *engine)
247{
 
 
248	struct i915_request *rq;
249	int err = 0;
250
251	if (llist_empty(&engine->barrier_tasks))
252		return 0;
253
254	if (!intel_engine_pm_get_if_awake(engine))
255		return 0;
256
257	rq = i915_request_create(engine->kernel_context);
 
 
 
 
 
258	if (IS_ERR(rq)) {
259		err = PTR_ERR(rq);
260		goto out_rpm;
261	}
262
263	idle_pulse(engine, rq);
264	i915_request_add(rq);
265
 
 
 
266out_rpm:
267	intel_engine_pm_put(engine);
268	return err;
269}
270
271#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
272#include "selftest_engine_heartbeat.c"
273#endif