Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2018 Intel Corporation
4 */
5
6#include <linux/prime_numbers.h>
7
8#include "gem/i915_gem_internal.h"
9#include "gem/i915_gem_pm.h"
10#include "gt/intel_engine_heartbeat.h"
11#include "gt/intel_reset.h"
12#include "gt/selftest_engine_heartbeat.h"
13
14#include "i915_selftest.h"
15#include "selftests/i915_random.h"
16#include "selftests/igt_flush_test.h"
17#include "selftests/igt_live_test.h"
18#include "selftests/igt_spinner.h"
19#include "selftests/lib_sw_fence.h"
20
21#include "gem/selftests/igt_gem_utils.h"
22#include "gem/selftests/mock_context.h"
23
24#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
25#define NUM_GPR 16
26#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
27
28static bool is_active(struct i915_request *rq)
29{
30 if (i915_request_is_active(rq))
31 return true;
32
33 if (i915_request_on_hold(rq))
34 return true;
35
36 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
37 return true;
38
39 return false;
40}
41
42static int wait_for_submit(struct intel_engine_cs *engine,
43 struct i915_request *rq,
44 unsigned long timeout)
45{
46 /* Ignore our own attempts to suppress excess tasklets */
47 tasklet_hi_schedule(&engine->sched_engine->tasklet);
48
49 timeout += jiffies;
50 do {
51 bool done = time_after(jiffies, timeout);
52
53 if (i915_request_completed(rq)) /* that was quick! */
54 return 0;
55
56 /* Wait until the HW has acknowleged the submission (or err) */
57 intel_engine_flush_submission(engine);
58 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
59 return 0;
60
61 if (done)
62 return -ETIME;
63
64 cond_resched();
65 } while (1);
66}
67
68static int wait_for_reset(struct intel_engine_cs *engine,
69 struct i915_request *rq,
70 unsigned long timeout)
71{
72 timeout += jiffies;
73
74 do {
75 cond_resched();
76 intel_engine_flush_submission(engine);
77
78 if (READ_ONCE(engine->execlists.pending[0]))
79 continue;
80
81 if (i915_request_completed(rq))
82 break;
83
84 if (READ_ONCE(rq->fence.error))
85 break;
86 } while (time_before(jiffies, timeout));
87
88 if (rq->fence.error != -EIO) {
89 pr_err("%s: hanging request %llx:%lld not reset\n",
90 engine->name,
91 rq->fence.context,
92 rq->fence.seqno);
93 return -EINVAL;
94 }
95
96 /* Give the request a jiffie to complete after flushing the worker */
97 if (i915_request_wait(rq, 0,
98 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
99 pr_err("%s: hanging request %llx:%lld did not complete\n",
100 engine->name,
101 rq->fence.context,
102 rq->fence.seqno);
103 return -ETIME;
104 }
105
106 return 0;
107}
108
109static int live_sanitycheck(void *arg)
110{
111 struct intel_gt *gt = arg;
112 struct intel_engine_cs *engine;
113 enum intel_engine_id id;
114 struct igt_spinner spin;
115 int err = 0;
116
117 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
118 return 0;
119
120 if (igt_spinner_init(&spin, gt))
121 return -ENOMEM;
122
123 for_each_engine(engine, gt, id) {
124 struct intel_context *ce;
125 struct i915_request *rq;
126
127 ce = intel_context_create(engine);
128 if (IS_ERR(ce)) {
129 err = PTR_ERR(ce);
130 break;
131 }
132
133 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
134 if (IS_ERR(rq)) {
135 err = PTR_ERR(rq);
136 goto out_ctx;
137 }
138
139 i915_request_add(rq);
140 if (!igt_wait_for_spinner(&spin, rq)) {
141 GEM_TRACE("spinner failed to start\n");
142 GEM_TRACE_DUMP();
143 intel_gt_set_wedged(gt);
144 err = -EIO;
145 goto out_ctx;
146 }
147
148 igt_spinner_end(&spin);
149 if (igt_flush_test(gt->i915)) {
150 err = -EIO;
151 goto out_ctx;
152 }
153
154out_ctx:
155 intel_context_put(ce);
156 if (err)
157 break;
158 }
159
160 igt_spinner_fini(&spin);
161 return err;
162}
163
164static int live_unlite_restore(struct intel_gt *gt, int prio)
165{
166 struct intel_engine_cs *engine;
167 enum intel_engine_id id;
168 struct igt_spinner spin;
169 int err = -ENOMEM;
170
171 /*
172 * Check that we can correctly context switch between 2 instances
173 * on the same engine from the same parent context.
174 */
175
176 if (igt_spinner_init(&spin, gt))
177 return err;
178
179 err = 0;
180 for_each_engine(engine, gt, id) {
181 struct intel_context *ce[2] = {};
182 struct i915_request *rq[2];
183 struct igt_live_test t;
184 int n;
185
186 if (prio && !intel_engine_has_preemption(engine))
187 continue;
188
189 if (!intel_engine_can_store_dword(engine))
190 continue;
191
192 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
193 err = -EIO;
194 break;
195 }
196 st_engine_heartbeat_disable(engine);
197
198 for (n = 0; n < ARRAY_SIZE(ce); n++) {
199 struct intel_context *tmp;
200
201 tmp = intel_context_create(engine);
202 if (IS_ERR(tmp)) {
203 err = PTR_ERR(tmp);
204 goto err_ce;
205 }
206
207 err = intel_context_pin(tmp);
208 if (err) {
209 intel_context_put(tmp);
210 goto err_ce;
211 }
212
213 /*
214 * Setup the pair of contexts such that if we
215 * lite-restore using the RING_TAIL from ce[1] it
216 * will execute garbage from ce[0]->ring.
217 */
218 memset(tmp->ring->vaddr,
219 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
220 tmp->ring->vma->size);
221
222 ce[n] = tmp;
223 }
224 GEM_BUG_ON(!ce[1]->ring->size);
225 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
226 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
227
228 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
229 if (IS_ERR(rq[0])) {
230 err = PTR_ERR(rq[0]);
231 goto err_ce;
232 }
233
234 i915_request_get(rq[0]);
235 i915_request_add(rq[0]);
236 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
237
238 if (!igt_wait_for_spinner(&spin, rq[0])) {
239 i915_request_put(rq[0]);
240 goto err_ce;
241 }
242
243 rq[1] = i915_request_create(ce[1]);
244 if (IS_ERR(rq[1])) {
245 err = PTR_ERR(rq[1]);
246 i915_request_put(rq[0]);
247 goto err_ce;
248 }
249
250 if (!prio) {
251 /*
252 * Ensure we do the switch to ce[1] on completion.
253 *
254 * rq[0] is already submitted, so this should reduce
255 * to a no-op (a wait on a request on the same engine
256 * uses the submit fence, not the completion fence),
257 * but it will install a dependency on rq[1] for rq[0]
258 * that will prevent the pair being reordered by
259 * timeslicing.
260 */
261 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
262 }
263
264 i915_request_get(rq[1]);
265 i915_request_add(rq[1]);
266 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
267 i915_request_put(rq[0]);
268
269 if (prio) {
270 struct i915_sched_attr attr = {
271 .priority = prio,
272 };
273
274 /* Alternatively preempt the spinner with ce[1] */
275 engine->sched_engine->schedule(rq[1], &attr);
276 }
277
278 /* And switch back to ce[0] for good measure */
279 rq[0] = i915_request_create(ce[0]);
280 if (IS_ERR(rq[0])) {
281 err = PTR_ERR(rq[0]);
282 i915_request_put(rq[1]);
283 goto err_ce;
284 }
285
286 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
287 i915_request_get(rq[0]);
288 i915_request_add(rq[0]);
289 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
290 i915_request_put(rq[1]);
291 i915_request_put(rq[0]);
292
293err_ce:
294 intel_engine_flush_submission(engine);
295 igt_spinner_end(&spin);
296 for (n = 0; n < ARRAY_SIZE(ce); n++) {
297 if (IS_ERR_OR_NULL(ce[n]))
298 break;
299
300 intel_context_unpin(ce[n]);
301 intel_context_put(ce[n]);
302 }
303
304 st_engine_heartbeat_enable(engine);
305 if (igt_live_test_end(&t))
306 err = -EIO;
307 if (err)
308 break;
309 }
310
311 igt_spinner_fini(&spin);
312 return err;
313}
314
315static int live_unlite_switch(void *arg)
316{
317 return live_unlite_restore(arg, 0);
318}
319
320static int live_unlite_preempt(void *arg)
321{
322 return live_unlite_restore(arg, I915_PRIORITY_MAX);
323}
324
325static int live_unlite_ring(void *arg)
326{
327 struct intel_gt *gt = arg;
328 struct intel_engine_cs *engine;
329 struct igt_spinner spin;
330 enum intel_engine_id id;
331 int err = 0;
332
333 /*
334 * Setup a preemption event that will cause almost the entire ring
335 * to be unwound, potentially fooling our intel_ring_direction()
336 * into emitting a forward lite-restore instead of the rollback.
337 */
338
339 if (igt_spinner_init(&spin, gt))
340 return -ENOMEM;
341
342 for_each_engine(engine, gt, id) {
343 struct intel_context *ce[2] = {};
344 struct i915_request *rq;
345 struct igt_live_test t;
346 int n;
347
348 if (!intel_engine_has_preemption(engine))
349 continue;
350
351 if (!intel_engine_can_store_dword(engine))
352 continue;
353
354 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
355 err = -EIO;
356 break;
357 }
358 st_engine_heartbeat_disable(engine);
359
360 for (n = 0; n < ARRAY_SIZE(ce); n++) {
361 struct intel_context *tmp;
362
363 tmp = intel_context_create(engine);
364 if (IS_ERR(tmp)) {
365 err = PTR_ERR(tmp);
366 goto err_ce;
367 }
368
369 err = intel_context_pin(tmp);
370 if (err) {
371 intel_context_put(tmp);
372 goto err_ce;
373 }
374
375 memset32(tmp->ring->vaddr,
376 0xdeadbeef, /* trigger a hang if executed */
377 tmp->ring->vma->size / sizeof(u32));
378
379 ce[n] = tmp;
380 }
381
382 /* Create max prio spinner, followed by N low prio nops */
383 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
384 if (IS_ERR(rq)) {
385 err = PTR_ERR(rq);
386 goto err_ce;
387 }
388
389 i915_request_get(rq);
390 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
391 i915_request_add(rq);
392
393 if (!igt_wait_for_spinner(&spin, rq)) {
394 intel_gt_set_wedged(gt);
395 i915_request_put(rq);
396 err = -ETIME;
397 goto err_ce;
398 }
399
400 /* Fill the ring, until we will cause a wrap */
401 n = 0;
402 while (intel_ring_direction(ce[0]->ring,
403 rq->wa_tail,
404 ce[0]->ring->tail) <= 0) {
405 struct i915_request *tmp;
406
407 tmp = intel_context_create_request(ce[0]);
408 if (IS_ERR(tmp)) {
409 err = PTR_ERR(tmp);
410 i915_request_put(rq);
411 goto err_ce;
412 }
413
414 i915_request_add(tmp);
415 intel_engine_flush_submission(engine);
416 n++;
417 }
418 intel_engine_flush_submission(engine);
419 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
420 engine->name, n,
421 ce[0]->ring->size,
422 ce[0]->ring->tail,
423 ce[0]->ring->emit,
424 rq->tail);
425 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
426 rq->tail,
427 ce[0]->ring->tail) <= 0);
428 i915_request_put(rq);
429
430 /* Create a second ring to preempt the first ring after rq[0] */
431 rq = intel_context_create_request(ce[1]);
432 if (IS_ERR(rq)) {
433 err = PTR_ERR(rq);
434 goto err_ce;
435 }
436
437 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
438 i915_request_get(rq);
439 i915_request_add(rq);
440
441 err = wait_for_submit(engine, rq, HZ / 2);
442 i915_request_put(rq);
443 if (err) {
444 pr_err("%s: preemption request was not submitted\n",
445 engine->name);
446 err = -ETIME;
447 }
448
449 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
450 engine->name,
451 ce[0]->ring->tail, ce[0]->ring->emit,
452 ce[1]->ring->tail, ce[1]->ring->emit);
453
454err_ce:
455 intel_engine_flush_submission(engine);
456 igt_spinner_end(&spin);
457 for (n = 0; n < ARRAY_SIZE(ce); n++) {
458 if (IS_ERR_OR_NULL(ce[n]))
459 break;
460
461 intel_context_unpin(ce[n]);
462 intel_context_put(ce[n]);
463 }
464 st_engine_heartbeat_enable(engine);
465 if (igt_live_test_end(&t))
466 err = -EIO;
467 if (err)
468 break;
469 }
470
471 igt_spinner_fini(&spin);
472 return err;
473}
474
475static int live_pin_rewind(void *arg)
476{
477 struct intel_gt *gt = arg;
478 struct intel_engine_cs *engine;
479 enum intel_engine_id id;
480 int err = 0;
481
482 /*
483 * We have to be careful not to trust intel_ring too much, for example
484 * ring->head is updated upon retire which is out of sync with pinning
485 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
486 * or else we risk writing an older, stale value.
487 *
488 * To simulate this, let's apply a bit of deliberate sabotague.
489 */
490
491 for_each_engine(engine, gt, id) {
492 struct intel_context *ce;
493 struct i915_request *rq;
494 struct intel_ring *ring;
495 struct igt_live_test t;
496
497 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
498 err = -EIO;
499 break;
500 }
501
502 ce = intel_context_create(engine);
503 if (IS_ERR(ce)) {
504 err = PTR_ERR(ce);
505 break;
506 }
507
508 err = intel_context_pin(ce);
509 if (err) {
510 intel_context_put(ce);
511 break;
512 }
513
514 /* Keep the context awake while we play games */
515 err = i915_active_acquire(&ce->active);
516 if (err) {
517 intel_context_unpin(ce);
518 intel_context_put(ce);
519 break;
520 }
521 ring = ce->ring;
522
523 /* Poison the ring, and offset the next request from HEAD */
524 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
525 ring->emit = ring->size / 2;
526 ring->tail = ring->emit;
527 GEM_BUG_ON(ring->head);
528
529 intel_context_unpin(ce);
530
531 /* Submit a simple nop request */
532 GEM_BUG_ON(intel_context_is_pinned(ce));
533 rq = intel_context_create_request(ce);
534 i915_active_release(&ce->active); /* e.g. async retire */
535 intel_context_put(ce);
536 if (IS_ERR(rq)) {
537 err = PTR_ERR(rq);
538 break;
539 }
540 GEM_BUG_ON(!rq->head);
541 i915_request_add(rq);
542
543 /* Expect not to hang! */
544 if (igt_live_test_end(&t)) {
545 err = -EIO;
546 break;
547 }
548 }
549
550 return err;
551}
552
553static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
554{
555 tasklet_disable(&engine->sched_engine->tasklet);
556 local_bh_disable();
557
558 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
559 &engine->gt->reset.flags)) {
560 local_bh_enable();
561 tasklet_enable(&engine->sched_engine->tasklet);
562
563 intel_gt_set_wedged(engine->gt);
564 return -EBUSY;
565 }
566
567 return 0;
568}
569
570static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
571{
572 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
573 &engine->gt->reset.flags);
574
575 local_bh_enable();
576 tasklet_enable(&engine->sched_engine->tasklet);
577}
578
579static int live_hold_reset(void *arg)
580{
581 struct intel_gt *gt = arg;
582 struct intel_engine_cs *engine;
583 enum intel_engine_id id;
584 struct igt_spinner spin;
585 int err = 0;
586
587 /*
588 * In order to support offline error capture for fast preempt reset,
589 * we need to decouple the guilty request and ensure that it and its
590 * descendents are not executed while the capture is in progress.
591 */
592
593 if (!intel_has_reset_engine(gt))
594 return 0;
595
596 if (igt_spinner_init(&spin, gt))
597 return -ENOMEM;
598
599 for_each_engine(engine, gt, id) {
600 struct intel_context *ce;
601 struct i915_request *rq;
602
603 ce = intel_context_create(engine);
604 if (IS_ERR(ce)) {
605 err = PTR_ERR(ce);
606 break;
607 }
608
609 st_engine_heartbeat_disable(engine);
610
611 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
612 if (IS_ERR(rq)) {
613 err = PTR_ERR(rq);
614 goto out;
615 }
616 i915_request_add(rq);
617
618 if (!igt_wait_for_spinner(&spin, rq)) {
619 intel_gt_set_wedged(gt);
620 err = -ETIME;
621 goto out;
622 }
623
624 /* We have our request executing, now remove it and reset */
625
626 err = engine_lock_reset_tasklet(engine);
627 if (err)
628 goto out;
629
630 engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
631 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
632
633 i915_request_get(rq);
634 execlists_hold(engine, rq);
635 GEM_BUG_ON(!i915_request_on_hold(rq));
636
637 __intel_engine_reset_bh(engine, NULL);
638 GEM_BUG_ON(rq->fence.error != -EIO);
639
640 engine_unlock_reset_tasklet(engine);
641
642 /* Check that we do not resubmit the held request */
643 if (!i915_request_wait(rq, 0, HZ / 5)) {
644 pr_err("%s: on hold request completed!\n",
645 engine->name);
646 i915_request_put(rq);
647 err = -EIO;
648 goto out;
649 }
650 GEM_BUG_ON(!i915_request_on_hold(rq));
651
652 /* But is resubmitted on release */
653 execlists_unhold(engine, rq);
654 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
655 pr_err("%s: held request did not complete!\n",
656 engine->name);
657 intel_gt_set_wedged(gt);
658 err = -ETIME;
659 }
660 i915_request_put(rq);
661
662out:
663 st_engine_heartbeat_enable(engine);
664 intel_context_put(ce);
665 if (err)
666 break;
667 }
668
669 igt_spinner_fini(&spin);
670 return err;
671}
672
673static const char *error_repr(int err)
674{
675 return err ? "bad" : "good";
676}
677
678static int live_error_interrupt(void *arg)
679{
680 static const struct error_phase {
681 enum { GOOD = 0, BAD = -EIO } error[2];
682 } phases[] = {
683 { { BAD, GOOD } },
684 { { BAD, BAD } },
685 { { BAD, GOOD } },
686 { { GOOD, GOOD } }, /* sentinel */
687 };
688 struct intel_gt *gt = arg;
689 struct intel_engine_cs *engine;
690 enum intel_engine_id id;
691
692 /*
693 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
694 * of invalid commands in user batches that will cause a GPU hang.
695 * This is a faster mechanism than using hangcheck/heartbeats, but
696 * only detects problems the HW knows about -- it will not warn when
697 * we kill the HW!
698 *
699 * To verify our detection and reset, we throw some invalid commands
700 * at the HW and wait for the interrupt.
701 */
702
703 if (!intel_has_reset_engine(gt))
704 return 0;
705
706 for_each_engine(engine, gt, id) {
707 const struct error_phase *p;
708 int err = 0;
709
710 st_engine_heartbeat_disable(engine);
711
712 for (p = phases; p->error[0] != GOOD; p++) {
713 struct i915_request *client[ARRAY_SIZE(phases->error)];
714 u32 *cs;
715 int i;
716
717 memset(client, 0, sizeof(*client));
718 for (i = 0; i < ARRAY_SIZE(client); i++) {
719 struct intel_context *ce;
720 struct i915_request *rq;
721
722 ce = intel_context_create(engine);
723 if (IS_ERR(ce)) {
724 err = PTR_ERR(ce);
725 goto out;
726 }
727
728 rq = intel_context_create_request(ce);
729 intel_context_put(ce);
730 if (IS_ERR(rq)) {
731 err = PTR_ERR(rq);
732 goto out;
733 }
734
735 if (rq->engine->emit_init_breadcrumb) {
736 err = rq->engine->emit_init_breadcrumb(rq);
737 if (err) {
738 i915_request_add(rq);
739 goto out;
740 }
741 }
742
743 cs = intel_ring_begin(rq, 2);
744 if (IS_ERR(cs)) {
745 i915_request_add(rq);
746 err = PTR_ERR(cs);
747 goto out;
748 }
749
750 if (p->error[i]) {
751 *cs++ = 0xdeadbeef;
752 *cs++ = 0xdeadbeef;
753 } else {
754 *cs++ = MI_NOOP;
755 *cs++ = MI_NOOP;
756 }
757
758 client[i] = i915_request_get(rq);
759 i915_request_add(rq);
760 }
761
762 err = wait_for_submit(engine, client[0], HZ / 2);
763 if (err) {
764 pr_err("%s: first request did not start within time!\n",
765 engine->name);
766 err = -ETIME;
767 goto out;
768 }
769
770 for (i = 0; i < ARRAY_SIZE(client); i++) {
771 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
772 pr_debug("%s: %s request incomplete!\n",
773 engine->name,
774 error_repr(p->error[i]));
775
776 if (!i915_request_started(client[i])) {
777 pr_err("%s: %s request not started!\n",
778 engine->name,
779 error_repr(p->error[i]));
780 err = -ETIME;
781 goto out;
782 }
783
784 /* Kick the tasklet to process the error */
785 intel_engine_flush_submission(engine);
786 if (client[i]->fence.error != p->error[i]) {
787 pr_err("%s: %s request (%s) with wrong error code: %d\n",
788 engine->name,
789 error_repr(p->error[i]),
790 i915_request_completed(client[i]) ? "completed" : "running",
791 client[i]->fence.error);
792 err = -EINVAL;
793 goto out;
794 }
795 }
796
797out:
798 for (i = 0; i < ARRAY_SIZE(client); i++)
799 if (client[i])
800 i915_request_put(client[i]);
801 if (err) {
802 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
803 engine->name, p - phases,
804 p->error[0], p->error[1]);
805 break;
806 }
807 }
808
809 st_engine_heartbeat_enable(engine);
810 if (err) {
811 intel_gt_set_wedged(gt);
812 return err;
813 }
814 }
815
816 return 0;
817}
818
819static int
820emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
821{
822 u32 *cs;
823
824 cs = intel_ring_begin(rq, 10);
825 if (IS_ERR(cs))
826 return PTR_ERR(cs);
827
828 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
829
830 *cs++ = MI_SEMAPHORE_WAIT |
831 MI_SEMAPHORE_GLOBAL_GTT |
832 MI_SEMAPHORE_POLL |
833 MI_SEMAPHORE_SAD_NEQ_SDD;
834 *cs++ = 0;
835 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
836 *cs++ = 0;
837
838 if (idx > 0) {
839 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
840 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
841 *cs++ = 0;
842 *cs++ = 1;
843 } else {
844 *cs++ = MI_NOOP;
845 *cs++ = MI_NOOP;
846 *cs++ = MI_NOOP;
847 *cs++ = MI_NOOP;
848 }
849
850 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
851
852 intel_ring_advance(rq, cs);
853 return 0;
854}
855
856static struct i915_request *
857semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
858{
859 struct intel_context *ce;
860 struct i915_request *rq;
861 int err;
862
863 ce = intel_context_create(engine);
864 if (IS_ERR(ce))
865 return ERR_CAST(ce);
866
867 rq = intel_context_create_request(ce);
868 if (IS_ERR(rq))
869 goto out_ce;
870
871 err = 0;
872 if (rq->engine->emit_init_breadcrumb)
873 err = rq->engine->emit_init_breadcrumb(rq);
874 if (err == 0)
875 err = emit_semaphore_chain(rq, vma, idx);
876 if (err == 0)
877 i915_request_get(rq);
878 i915_request_add(rq);
879 if (err)
880 rq = ERR_PTR(err);
881
882out_ce:
883 intel_context_put(ce);
884 return rq;
885}
886
887static int
888release_queue(struct intel_engine_cs *engine,
889 struct i915_vma *vma,
890 int idx, int prio)
891{
892 struct i915_sched_attr attr = {
893 .priority = prio,
894 };
895 struct i915_request *rq;
896 u32 *cs;
897
898 rq = intel_engine_create_kernel_request(engine);
899 if (IS_ERR(rq))
900 return PTR_ERR(rq);
901
902 cs = intel_ring_begin(rq, 4);
903 if (IS_ERR(cs)) {
904 i915_request_add(rq);
905 return PTR_ERR(cs);
906 }
907
908 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
909 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
910 *cs++ = 0;
911 *cs++ = 1;
912
913 intel_ring_advance(rq, cs);
914
915 i915_request_get(rq);
916 i915_request_add(rq);
917
918 local_bh_disable();
919 engine->sched_engine->schedule(rq, &attr);
920 local_bh_enable(); /* kick tasklet */
921
922 i915_request_put(rq);
923
924 return 0;
925}
926
927static int
928slice_semaphore_queue(struct intel_engine_cs *outer,
929 struct i915_vma *vma,
930 int count)
931{
932 struct intel_engine_cs *engine;
933 struct i915_request *head;
934 enum intel_engine_id id;
935 int err, i, n = 0;
936
937 head = semaphore_queue(outer, vma, n++);
938 if (IS_ERR(head))
939 return PTR_ERR(head);
940
941 for_each_engine(engine, outer->gt, id) {
942 if (!intel_engine_has_preemption(engine))
943 continue;
944
945 for (i = 0; i < count; i++) {
946 struct i915_request *rq;
947
948 rq = semaphore_queue(engine, vma, n++);
949 if (IS_ERR(rq)) {
950 err = PTR_ERR(rq);
951 goto out;
952 }
953
954 i915_request_put(rq);
955 }
956 }
957
958 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
959 if (err)
960 goto out;
961
962 if (i915_request_wait(head, 0,
963 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
964 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
965 outer->name, count, n);
966 GEM_TRACE_DUMP();
967 intel_gt_set_wedged(outer->gt);
968 err = -EIO;
969 }
970
971out:
972 i915_request_put(head);
973 return err;
974}
975
976static int live_timeslice_preempt(void *arg)
977{
978 struct intel_gt *gt = arg;
979 struct drm_i915_gem_object *obj;
980 struct intel_engine_cs *engine;
981 enum intel_engine_id id;
982 struct i915_vma *vma;
983 void *vaddr;
984 int err = 0;
985
986 /*
987 * If a request takes too long, we would like to give other users
988 * a fair go on the GPU. In particular, users may create batches
989 * that wait upon external input, where that input may even be
990 * supplied by another GPU job. To avoid blocking forever, we
991 * need to preempt the current task and replace it with another
992 * ready task.
993 */
994 if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
995 return 0;
996
997 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
998 if (IS_ERR(obj))
999 return PTR_ERR(obj);
1000
1001 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1002 if (IS_ERR(vma)) {
1003 err = PTR_ERR(vma);
1004 goto err_obj;
1005 }
1006
1007 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1008 if (IS_ERR(vaddr)) {
1009 err = PTR_ERR(vaddr);
1010 goto err_obj;
1011 }
1012
1013 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1014 if (err)
1015 goto err_map;
1016
1017 err = i915_vma_sync(vma);
1018 if (err)
1019 goto err_pin;
1020
1021 for_each_engine(engine, gt, id) {
1022 if (!intel_engine_has_preemption(engine))
1023 continue;
1024
1025 memset(vaddr, 0, PAGE_SIZE);
1026
1027 st_engine_heartbeat_disable(engine);
1028 err = slice_semaphore_queue(engine, vma, 5);
1029 st_engine_heartbeat_enable(engine);
1030 if (err)
1031 goto err_pin;
1032
1033 if (igt_flush_test(gt->i915)) {
1034 err = -EIO;
1035 goto err_pin;
1036 }
1037 }
1038
1039err_pin:
1040 i915_vma_unpin(vma);
1041err_map:
1042 i915_gem_object_unpin_map(obj);
1043err_obj:
1044 i915_gem_object_put(obj);
1045 return err;
1046}
1047
1048static struct i915_request *
1049create_rewinder(struct intel_context *ce,
1050 struct i915_request *wait,
1051 void *slot, int idx)
1052{
1053 const u32 offset =
1054 i915_ggtt_offset(ce->engine->status_page.vma) +
1055 offset_in_page(slot);
1056 struct i915_request *rq;
1057 u32 *cs;
1058 int err;
1059
1060 rq = intel_context_create_request(ce);
1061 if (IS_ERR(rq))
1062 return rq;
1063
1064 if (wait) {
1065 err = i915_request_await_dma_fence(rq, &wait->fence);
1066 if (err)
1067 goto err;
1068 }
1069
1070 cs = intel_ring_begin(rq, 14);
1071 if (IS_ERR(cs)) {
1072 err = PTR_ERR(cs);
1073 goto err;
1074 }
1075
1076 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1077 *cs++ = MI_NOOP;
1078
1079 *cs++ = MI_SEMAPHORE_WAIT |
1080 MI_SEMAPHORE_GLOBAL_GTT |
1081 MI_SEMAPHORE_POLL |
1082 MI_SEMAPHORE_SAD_GTE_SDD;
1083 *cs++ = idx;
1084 *cs++ = offset;
1085 *cs++ = 0;
1086
1087 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1088 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1089 *cs++ = offset + idx * sizeof(u32);
1090 *cs++ = 0;
1091
1092 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1093 *cs++ = offset;
1094 *cs++ = 0;
1095 *cs++ = idx + 1;
1096
1097 intel_ring_advance(rq, cs);
1098
1099 err = 0;
1100err:
1101 i915_request_get(rq);
1102 i915_request_add(rq);
1103 if (err) {
1104 i915_request_put(rq);
1105 return ERR_PTR(err);
1106 }
1107
1108 return rq;
1109}
1110
1111static int live_timeslice_rewind(void *arg)
1112{
1113 struct intel_gt *gt = arg;
1114 struct intel_engine_cs *engine;
1115 enum intel_engine_id id;
1116
1117 /*
1118 * The usual presumption on timeslice expiration is that we replace
1119 * the active context with another. However, given a chain of
1120 * dependencies we may end up with replacing the context with itself,
1121 * but only a few of those requests, forcing us to rewind the
1122 * RING_TAIL of the original request.
1123 */
1124 if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1125 return 0;
1126
1127 for_each_engine(engine, gt, id) {
1128 enum { A1, A2, B1 };
1129 enum { X = 1, Z, Y };
1130 struct i915_request *rq[3] = {};
1131 struct intel_context *ce;
1132 unsigned long timeslice;
1133 int i, err = 0;
1134 u32 *slot;
1135
1136 if (!intel_engine_has_timeslices(engine))
1137 continue;
1138
1139 /*
1140 * A:rq1 -- semaphore wait, timestamp X
1141 * A:rq2 -- write timestamp Y
1142 *
1143 * B:rq1 [await A:rq1] -- write timestamp Z
1144 *
1145 * Force timeslice, release semaphore.
1146 *
1147 * Expect execution/evaluation order XZY
1148 */
1149
1150 st_engine_heartbeat_disable(engine);
1151 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1152
1153 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1154
1155 ce = intel_context_create(engine);
1156 if (IS_ERR(ce)) {
1157 err = PTR_ERR(ce);
1158 goto err;
1159 }
1160
1161 rq[A1] = create_rewinder(ce, NULL, slot, X);
1162 if (IS_ERR(rq[A1])) {
1163 intel_context_put(ce);
1164 goto err;
1165 }
1166
1167 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1168 intel_context_put(ce);
1169 if (IS_ERR(rq[A2]))
1170 goto err;
1171
1172 err = wait_for_submit(engine, rq[A2], HZ / 2);
1173 if (err) {
1174 pr_err("%s: failed to submit first context\n",
1175 engine->name);
1176 goto err;
1177 }
1178
1179 ce = intel_context_create(engine);
1180 if (IS_ERR(ce)) {
1181 err = PTR_ERR(ce);
1182 goto err;
1183 }
1184
1185 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1186 intel_context_put(ce);
1187 if (IS_ERR(rq[2]))
1188 goto err;
1189
1190 err = wait_for_submit(engine, rq[B1], HZ / 2);
1191 if (err) {
1192 pr_err("%s: failed to submit second context\n",
1193 engine->name);
1194 goto err;
1195 }
1196
1197 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1198 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1199 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1200 /* Wait for the timeslice to kick in */
1201 del_timer(&engine->execlists.timer);
1202 tasklet_hi_schedule(&engine->sched_engine->tasklet);
1203 intel_engine_flush_submission(engine);
1204 }
1205 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1206 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1207 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1208 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1209
1210 /* Release the hounds! */
1211 slot[0] = 1;
1212 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1213
1214 for (i = 1; i <= 3; i++) {
1215 unsigned long timeout = jiffies + HZ / 2;
1216
1217 while (!READ_ONCE(slot[i]) &&
1218 time_before(jiffies, timeout))
1219 ;
1220
1221 if (!time_before(jiffies, timeout)) {
1222 pr_err("%s: rq[%d] timed out\n",
1223 engine->name, i - 1);
1224 err = -ETIME;
1225 goto err;
1226 }
1227
1228 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1229 }
1230
1231 /* XZY: XZ < XY */
1232 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1233 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1234 engine->name,
1235 slot[Z] - slot[X],
1236 slot[Y] - slot[X]);
1237 err = -EINVAL;
1238 }
1239
1240err:
1241 memset32(&slot[0], -1, 4);
1242 wmb();
1243
1244 engine->props.timeslice_duration_ms = timeslice;
1245 st_engine_heartbeat_enable(engine);
1246 for (i = 0; i < 3; i++)
1247 i915_request_put(rq[i]);
1248 if (igt_flush_test(gt->i915))
1249 err = -EIO;
1250 if (err)
1251 return err;
1252 }
1253
1254 return 0;
1255}
1256
1257static struct i915_request *nop_request(struct intel_engine_cs *engine)
1258{
1259 struct i915_request *rq;
1260
1261 rq = intel_engine_create_kernel_request(engine);
1262 if (IS_ERR(rq))
1263 return rq;
1264
1265 i915_request_get(rq);
1266 i915_request_add(rq);
1267
1268 return rq;
1269}
1270
1271static long slice_timeout(struct intel_engine_cs *engine)
1272{
1273 long timeout;
1274
1275 /* Enough time for a timeslice to kick in, and kick out */
1276 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1277
1278 /* Enough time for the nop request to complete */
1279 timeout += HZ / 5;
1280
1281 return timeout + 1;
1282}
1283
1284static int live_timeslice_queue(void *arg)
1285{
1286 struct intel_gt *gt = arg;
1287 struct drm_i915_gem_object *obj;
1288 struct intel_engine_cs *engine;
1289 enum intel_engine_id id;
1290 struct i915_vma *vma;
1291 void *vaddr;
1292 int err = 0;
1293
1294 /*
1295 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1296 * timeslicing between them disabled, we *do* enable timeslicing
1297 * if the queue demands it. (Normally, we do not submit if
1298 * ELSP[1] is already occupied, so must rely on timeslicing to
1299 * eject ELSP[0] in favour of the queue.)
1300 */
1301 if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1302 return 0;
1303
1304 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1305 if (IS_ERR(obj))
1306 return PTR_ERR(obj);
1307
1308 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1309 if (IS_ERR(vma)) {
1310 err = PTR_ERR(vma);
1311 goto err_obj;
1312 }
1313
1314 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1315 if (IS_ERR(vaddr)) {
1316 err = PTR_ERR(vaddr);
1317 goto err_obj;
1318 }
1319
1320 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1321 if (err)
1322 goto err_map;
1323
1324 err = i915_vma_sync(vma);
1325 if (err)
1326 goto err_pin;
1327
1328 for_each_engine(engine, gt, id) {
1329 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1330 struct i915_request *rq, *nop;
1331
1332 if (!intel_engine_has_preemption(engine))
1333 continue;
1334
1335 st_engine_heartbeat_disable(engine);
1336 memset(vaddr, 0, PAGE_SIZE);
1337
1338 /* ELSP[0]: semaphore wait */
1339 rq = semaphore_queue(engine, vma, 0);
1340 if (IS_ERR(rq)) {
1341 err = PTR_ERR(rq);
1342 goto err_heartbeat;
1343 }
1344 engine->sched_engine->schedule(rq, &attr);
1345 err = wait_for_submit(engine, rq, HZ / 2);
1346 if (err) {
1347 pr_err("%s: Timed out trying to submit semaphores\n",
1348 engine->name);
1349 goto err_rq;
1350 }
1351
1352 /* ELSP[1]: nop request */
1353 nop = nop_request(engine);
1354 if (IS_ERR(nop)) {
1355 err = PTR_ERR(nop);
1356 goto err_rq;
1357 }
1358 err = wait_for_submit(engine, nop, HZ / 2);
1359 i915_request_put(nop);
1360 if (err) {
1361 pr_err("%s: Timed out trying to submit nop\n",
1362 engine->name);
1363 goto err_rq;
1364 }
1365
1366 GEM_BUG_ON(i915_request_completed(rq));
1367 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1368
1369 /* Queue: semaphore signal, matching priority as semaphore */
1370 err = release_queue(engine, vma, 1, effective_prio(rq));
1371 if (err)
1372 goto err_rq;
1373
1374 /* Wait until we ack the release_queue and start timeslicing */
1375 do {
1376 cond_resched();
1377 intel_engine_flush_submission(engine);
1378 } while (READ_ONCE(engine->execlists.pending[0]));
1379
1380 /* Timeslice every jiffy, so within 2 we should signal */
1381 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1382 struct drm_printer p =
1383 drm_info_printer(gt->i915->drm.dev);
1384
1385 pr_err("%s: Failed to timeslice into queue\n",
1386 engine->name);
1387 intel_engine_dump(engine, &p,
1388 "%s\n", engine->name);
1389
1390 memset(vaddr, 0xff, PAGE_SIZE);
1391 err = -EIO;
1392 }
1393err_rq:
1394 i915_request_put(rq);
1395err_heartbeat:
1396 st_engine_heartbeat_enable(engine);
1397 if (err)
1398 break;
1399 }
1400
1401err_pin:
1402 i915_vma_unpin(vma);
1403err_map:
1404 i915_gem_object_unpin_map(obj);
1405err_obj:
1406 i915_gem_object_put(obj);
1407 return err;
1408}
1409
1410static int live_timeslice_nopreempt(void *arg)
1411{
1412 struct intel_gt *gt = arg;
1413 struct intel_engine_cs *engine;
1414 enum intel_engine_id id;
1415 struct igt_spinner spin;
1416 int err = 0;
1417
1418 /*
1419 * We should not timeslice into a request that is marked with
1420 * I915_REQUEST_NOPREEMPT.
1421 */
1422 if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
1423 return 0;
1424
1425 if (igt_spinner_init(&spin, gt))
1426 return -ENOMEM;
1427
1428 for_each_engine(engine, gt, id) {
1429 struct intel_context *ce;
1430 struct i915_request *rq;
1431 unsigned long timeslice;
1432
1433 if (!intel_engine_has_preemption(engine))
1434 continue;
1435
1436 ce = intel_context_create(engine);
1437 if (IS_ERR(ce)) {
1438 err = PTR_ERR(ce);
1439 break;
1440 }
1441
1442 st_engine_heartbeat_disable(engine);
1443 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1444
1445 /* Create an unpreemptible spinner */
1446
1447 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1448 intel_context_put(ce);
1449 if (IS_ERR(rq)) {
1450 err = PTR_ERR(rq);
1451 goto out_heartbeat;
1452 }
1453
1454 i915_request_get(rq);
1455 i915_request_add(rq);
1456
1457 if (!igt_wait_for_spinner(&spin, rq)) {
1458 i915_request_put(rq);
1459 err = -ETIME;
1460 goto out_spin;
1461 }
1462
1463 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1464 i915_request_put(rq);
1465
1466 /* Followed by a maximum priority barrier (heartbeat) */
1467
1468 ce = intel_context_create(engine);
1469 if (IS_ERR(ce)) {
1470 err = PTR_ERR(ce);
1471 goto out_spin;
1472 }
1473
1474 rq = intel_context_create_request(ce);
1475 intel_context_put(ce);
1476 if (IS_ERR(rq)) {
1477 err = PTR_ERR(rq);
1478 goto out_spin;
1479 }
1480
1481 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1482 i915_request_get(rq);
1483 i915_request_add(rq);
1484
1485 /*
1486 * Wait until the barrier is in ELSP, and we know timeslicing
1487 * will have been activated.
1488 */
1489 if (wait_for_submit(engine, rq, HZ / 2)) {
1490 i915_request_put(rq);
1491 err = -ETIME;
1492 goto out_spin;
1493 }
1494
1495 /*
1496 * Since the ELSP[0] request is unpreemptible, it should not
1497 * allow the maximum priority barrier through. Wait long
1498 * enough to see if it is timesliced in by mistake.
1499 */
1500 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1501 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1502 engine->name);
1503 err = -EINVAL;
1504 }
1505 i915_request_put(rq);
1506
1507out_spin:
1508 igt_spinner_end(&spin);
1509out_heartbeat:
1510 xchg(&engine->props.timeslice_duration_ms, timeslice);
1511 st_engine_heartbeat_enable(engine);
1512 if (err)
1513 break;
1514
1515 if (igt_flush_test(gt->i915)) {
1516 err = -EIO;
1517 break;
1518 }
1519 }
1520
1521 igt_spinner_fini(&spin);
1522 return err;
1523}
1524
1525static int live_busywait_preempt(void *arg)
1526{
1527 struct intel_gt *gt = arg;
1528 struct i915_gem_context *ctx_hi, *ctx_lo;
1529 struct intel_engine_cs *engine;
1530 struct drm_i915_gem_object *obj;
1531 struct i915_vma *vma;
1532 enum intel_engine_id id;
1533 int err = -ENOMEM;
1534 u32 *map;
1535
1536 /*
1537 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1538 * preempt the busywaits used to synchronise between rings.
1539 */
1540
1541 ctx_hi = kernel_context(gt->i915, NULL);
1542 if (!ctx_hi)
1543 return -ENOMEM;
1544 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1545
1546 ctx_lo = kernel_context(gt->i915, NULL);
1547 if (!ctx_lo)
1548 goto err_ctx_hi;
1549 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1550
1551 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1552 if (IS_ERR(obj)) {
1553 err = PTR_ERR(obj);
1554 goto err_ctx_lo;
1555 }
1556
1557 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1558 if (IS_ERR(map)) {
1559 err = PTR_ERR(map);
1560 goto err_obj;
1561 }
1562
1563 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1564 if (IS_ERR(vma)) {
1565 err = PTR_ERR(vma);
1566 goto err_map;
1567 }
1568
1569 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1570 if (err)
1571 goto err_map;
1572
1573 err = i915_vma_sync(vma);
1574 if (err)
1575 goto err_vma;
1576
1577 for_each_engine(engine, gt, id) {
1578 struct i915_request *lo, *hi;
1579 struct igt_live_test t;
1580 u32 *cs;
1581
1582 if (!intel_engine_has_preemption(engine))
1583 continue;
1584
1585 if (!intel_engine_can_store_dword(engine))
1586 continue;
1587
1588 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1589 err = -EIO;
1590 goto err_vma;
1591 }
1592
1593 /*
1594 * We create two requests. The low priority request
1595 * busywaits on a semaphore (inside the ringbuffer where
1596 * is should be preemptible) and the high priority requests
1597 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1598 * allowing the first request to complete. If preemption
1599 * fails, we hang instead.
1600 */
1601
1602 lo = igt_request_alloc(ctx_lo, engine);
1603 if (IS_ERR(lo)) {
1604 err = PTR_ERR(lo);
1605 goto err_vma;
1606 }
1607
1608 cs = intel_ring_begin(lo, 8);
1609 if (IS_ERR(cs)) {
1610 err = PTR_ERR(cs);
1611 i915_request_add(lo);
1612 goto err_vma;
1613 }
1614
1615 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1616 *cs++ = i915_ggtt_offset(vma);
1617 *cs++ = 0;
1618 *cs++ = 1;
1619
1620 /* XXX Do we need a flush + invalidate here? */
1621
1622 *cs++ = MI_SEMAPHORE_WAIT |
1623 MI_SEMAPHORE_GLOBAL_GTT |
1624 MI_SEMAPHORE_POLL |
1625 MI_SEMAPHORE_SAD_EQ_SDD;
1626 *cs++ = 0;
1627 *cs++ = i915_ggtt_offset(vma);
1628 *cs++ = 0;
1629
1630 intel_ring_advance(lo, cs);
1631
1632 i915_request_get(lo);
1633 i915_request_add(lo);
1634
1635 if (wait_for(READ_ONCE(*map), 10)) {
1636 i915_request_put(lo);
1637 err = -ETIMEDOUT;
1638 goto err_vma;
1639 }
1640
1641 /* Low priority request should be busywaiting now */
1642 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1643 i915_request_put(lo);
1644 pr_err("%s: Busywaiting request did not!\n",
1645 engine->name);
1646 err = -EIO;
1647 goto err_vma;
1648 }
1649
1650 hi = igt_request_alloc(ctx_hi, engine);
1651 if (IS_ERR(hi)) {
1652 err = PTR_ERR(hi);
1653 i915_request_put(lo);
1654 goto err_vma;
1655 }
1656
1657 cs = intel_ring_begin(hi, 4);
1658 if (IS_ERR(cs)) {
1659 err = PTR_ERR(cs);
1660 i915_request_add(hi);
1661 i915_request_put(lo);
1662 goto err_vma;
1663 }
1664
1665 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1666 *cs++ = i915_ggtt_offset(vma);
1667 *cs++ = 0;
1668 *cs++ = 0;
1669
1670 intel_ring_advance(hi, cs);
1671 i915_request_add(hi);
1672
1673 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1674 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1675
1676 pr_err("%s: Failed to preempt semaphore busywait!\n",
1677 engine->name);
1678
1679 intel_engine_dump(engine, &p, "%s\n", engine->name);
1680 GEM_TRACE_DUMP();
1681
1682 i915_request_put(lo);
1683 intel_gt_set_wedged(gt);
1684 err = -EIO;
1685 goto err_vma;
1686 }
1687 GEM_BUG_ON(READ_ONCE(*map));
1688 i915_request_put(lo);
1689
1690 if (igt_live_test_end(&t)) {
1691 err = -EIO;
1692 goto err_vma;
1693 }
1694 }
1695
1696 err = 0;
1697err_vma:
1698 i915_vma_unpin(vma);
1699err_map:
1700 i915_gem_object_unpin_map(obj);
1701err_obj:
1702 i915_gem_object_put(obj);
1703err_ctx_lo:
1704 kernel_context_close(ctx_lo);
1705err_ctx_hi:
1706 kernel_context_close(ctx_hi);
1707 return err;
1708}
1709
1710static struct i915_request *
1711spinner_create_request(struct igt_spinner *spin,
1712 struct i915_gem_context *ctx,
1713 struct intel_engine_cs *engine,
1714 u32 arb)
1715{
1716 struct intel_context *ce;
1717 struct i915_request *rq;
1718
1719 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1720 if (IS_ERR(ce))
1721 return ERR_CAST(ce);
1722
1723 rq = igt_spinner_create_request(spin, ce, arb);
1724 intel_context_put(ce);
1725 return rq;
1726}
1727
1728static int live_preempt(void *arg)
1729{
1730 struct intel_gt *gt = arg;
1731 struct i915_gem_context *ctx_hi, *ctx_lo;
1732 struct igt_spinner spin_hi, spin_lo;
1733 struct intel_engine_cs *engine;
1734 enum intel_engine_id id;
1735 int err = -ENOMEM;
1736
1737 ctx_hi = kernel_context(gt->i915, NULL);
1738 if (!ctx_hi)
1739 return -ENOMEM;
1740 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1741
1742 ctx_lo = kernel_context(gt->i915, NULL);
1743 if (!ctx_lo)
1744 goto err_ctx_hi;
1745 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1746
1747 if (igt_spinner_init(&spin_hi, gt))
1748 goto err_ctx_lo;
1749
1750 if (igt_spinner_init(&spin_lo, gt))
1751 goto err_spin_hi;
1752
1753 for_each_engine(engine, gt, id) {
1754 struct igt_live_test t;
1755 struct i915_request *rq;
1756
1757 if (!intel_engine_has_preemption(engine))
1758 continue;
1759
1760 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1761 err = -EIO;
1762 goto err_spin_lo;
1763 }
1764
1765 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1766 MI_ARB_CHECK);
1767 if (IS_ERR(rq)) {
1768 err = PTR_ERR(rq);
1769 goto err_spin_lo;
1770 }
1771
1772 i915_request_add(rq);
1773 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1774 GEM_TRACE("lo spinner failed to start\n");
1775 GEM_TRACE_DUMP();
1776 intel_gt_set_wedged(gt);
1777 err = -EIO;
1778 goto err_spin_lo;
1779 }
1780
1781 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1782 MI_ARB_CHECK);
1783 if (IS_ERR(rq)) {
1784 igt_spinner_end(&spin_lo);
1785 err = PTR_ERR(rq);
1786 goto err_spin_lo;
1787 }
1788
1789 i915_request_add(rq);
1790 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1791 GEM_TRACE("hi spinner failed to start\n");
1792 GEM_TRACE_DUMP();
1793 intel_gt_set_wedged(gt);
1794 err = -EIO;
1795 goto err_spin_lo;
1796 }
1797
1798 igt_spinner_end(&spin_hi);
1799 igt_spinner_end(&spin_lo);
1800
1801 if (igt_live_test_end(&t)) {
1802 err = -EIO;
1803 goto err_spin_lo;
1804 }
1805 }
1806
1807 err = 0;
1808err_spin_lo:
1809 igt_spinner_fini(&spin_lo);
1810err_spin_hi:
1811 igt_spinner_fini(&spin_hi);
1812err_ctx_lo:
1813 kernel_context_close(ctx_lo);
1814err_ctx_hi:
1815 kernel_context_close(ctx_hi);
1816 return err;
1817}
1818
1819static int live_late_preempt(void *arg)
1820{
1821 struct intel_gt *gt = arg;
1822 struct i915_gem_context *ctx_hi, *ctx_lo;
1823 struct igt_spinner spin_hi, spin_lo;
1824 struct intel_engine_cs *engine;
1825 struct i915_sched_attr attr = {};
1826 enum intel_engine_id id;
1827 int err = -ENOMEM;
1828
1829 ctx_hi = kernel_context(gt->i915, NULL);
1830 if (!ctx_hi)
1831 return -ENOMEM;
1832
1833 ctx_lo = kernel_context(gt->i915, NULL);
1834 if (!ctx_lo)
1835 goto err_ctx_hi;
1836
1837 if (igt_spinner_init(&spin_hi, gt))
1838 goto err_ctx_lo;
1839
1840 if (igt_spinner_init(&spin_lo, gt))
1841 goto err_spin_hi;
1842
1843 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1844 ctx_lo->sched.priority = 1;
1845
1846 for_each_engine(engine, gt, id) {
1847 struct igt_live_test t;
1848 struct i915_request *rq;
1849
1850 if (!intel_engine_has_preemption(engine))
1851 continue;
1852
1853 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1854 err = -EIO;
1855 goto err_spin_lo;
1856 }
1857
1858 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1859 MI_ARB_CHECK);
1860 if (IS_ERR(rq)) {
1861 err = PTR_ERR(rq);
1862 goto err_spin_lo;
1863 }
1864
1865 i915_request_add(rq);
1866 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1867 pr_err("First context failed to start\n");
1868 goto err_wedged;
1869 }
1870
1871 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1872 MI_NOOP);
1873 if (IS_ERR(rq)) {
1874 igt_spinner_end(&spin_lo);
1875 err = PTR_ERR(rq);
1876 goto err_spin_lo;
1877 }
1878
1879 i915_request_add(rq);
1880 if (igt_wait_for_spinner(&spin_hi, rq)) {
1881 pr_err("Second context overtook first?\n");
1882 goto err_wedged;
1883 }
1884
1885 attr.priority = I915_PRIORITY_MAX;
1886 engine->sched_engine->schedule(rq, &attr);
1887
1888 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1889 pr_err("High priority context failed to preempt the low priority context\n");
1890 GEM_TRACE_DUMP();
1891 goto err_wedged;
1892 }
1893
1894 igt_spinner_end(&spin_hi);
1895 igt_spinner_end(&spin_lo);
1896
1897 if (igt_live_test_end(&t)) {
1898 err = -EIO;
1899 goto err_spin_lo;
1900 }
1901 }
1902
1903 err = 0;
1904err_spin_lo:
1905 igt_spinner_fini(&spin_lo);
1906err_spin_hi:
1907 igt_spinner_fini(&spin_hi);
1908err_ctx_lo:
1909 kernel_context_close(ctx_lo);
1910err_ctx_hi:
1911 kernel_context_close(ctx_hi);
1912 return err;
1913
1914err_wedged:
1915 igt_spinner_end(&spin_hi);
1916 igt_spinner_end(&spin_lo);
1917 intel_gt_set_wedged(gt);
1918 err = -EIO;
1919 goto err_spin_lo;
1920}
1921
1922struct preempt_client {
1923 struct igt_spinner spin;
1924 struct i915_gem_context *ctx;
1925};
1926
1927static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1928{
1929 c->ctx = kernel_context(gt->i915, NULL);
1930 if (!c->ctx)
1931 return -ENOMEM;
1932
1933 if (igt_spinner_init(&c->spin, gt))
1934 goto err_ctx;
1935
1936 return 0;
1937
1938err_ctx:
1939 kernel_context_close(c->ctx);
1940 return -ENOMEM;
1941}
1942
1943static void preempt_client_fini(struct preempt_client *c)
1944{
1945 igt_spinner_fini(&c->spin);
1946 kernel_context_close(c->ctx);
1947}
1948
1949static int live_nopreempt(void *arg)
1950{
1951 struct intel_gt *gt = arg;
1952 struct intel_engine_cs *engine;
1953 struct preempt_client a, b;
1954 enum intel_engine_id id;
1955 int err = -ENOMEM;
1956
1957 /*
1958 * Verify that we can disable preemption for an individual request
1959 * that may be being observed and not want to be interrupted.
1960 */
1961
1962 if (preempt_client_init(gt, &a))
1963 return -ENOMEM;
1964 if (preempt_client_init(gt, &b))
1965 goto err_client_a;
1966 b.ctx->sched.priority = I915_PRIORITY_MAX;
1967
1968 for_each_engine(engine, gt, id) {
1969 struct i915_request *rq_a, *rq_b;
1970
1971 if (!intel_engine_has_preemption(engine))
1972 continue;
1973
1974 engine->execlists.preempt_hang.count = 0;
1975
1976 rq_a = spinner_create_request(&a.spin,
1977 a.ctx, engine,
1978 MI_ARB_CHECK);
1979 if (IS_ERR(rq_a)) {
1980 err = PTR_ERR(rq_a);
1981 goto err_client_b;
1982 }
1983
1984 /* Low priority client, but unpreemptable! */
1985 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1986
1987 i915_request_add(rq_a);
1988 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1989 pr_err("First client failed to start\n");
1990 goto err_wedged;
1991 }
1992
1993 rq_b = spinner_create_request(&b.spin,
1994 b.ctx, engine,
1995 MI_ARB_CHECK);
1996 if (IS_ERR(rq_b)) {
1997 err = PTR_ERR(rq_b);
1998 goto err_client_b;
1999 }
2000
2001 i915_request_add(rq_b);
2002
2003 /* B is much more important than A! (But A is unpreemptable.) */
2004 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2005
2006 /* Wait long enough for preemption and timeslicing */
2007 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2008 pr_err("Second client started too early!\n");
2009 goto err_wedged;
2010 }
2011
2012 igt_spinner_end(&a.spin);
2013
2014 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2015 pr_err("Second client failed to start\n");
2016 goto err_wedged;
2017 }
2018
2019 igt_spinner_end(&b.spin);
2020
2021 if (engine->execlists.preempt_hang.count) {
2022 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2023 engine->execlists.preempt_hang.count);
2024 err = -EINVAL;
2025 goto err_wedged;
2026 }
2027
2028 if (igt_flush_test(gt->i915))
2029 goto err_wedged;
2030 }
2031
2032 err = 0;
2033err_client_b:
2034 preempt_client_fini(&b);
2035err_client_a:
2036 preempt_client_fini(&a);
2037 return err;
2038
2039err_wedged:
2040 igt_spinner_end(&b.spin);
2041 igt_spinner_end(&a.spin);
2042 intel_gt_set_wedged(gt);
2043 err = -EIO;
2044 goto err_client_b;
2045}
2046
2047struct live_preempt_cancel {
2048 struct intel_engine_cs *engine;
2049 struct preempt_client a, b;
2050};
2051
2052static int __cancel_active0(struct live_preempt_cancel *arg)
2053{
2054 struct i915_request *rq;
2055 struct igt_live_test t;
2056 int err;
2057
2058 /* Preempt cancel of ELSP0 */
2059 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2060 if (igt_live_test_begin(&t, arg->engine->i915,
2061 __func__, arg->engine->name))
2062 return -EIO;
2063
2064 rq = spinner_create_request(&arg->a.spin,
2065 arg->a.ctx, arg->engine,
2066 MI_ARB_CHECK);
2067 if (IS_ERR(rq))
2068 return PTR_ERR(rq);
2069
2070 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2071 i915_request_get(rq);
2072 i915_request_add(rq);
2073 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2074 err = -EIO;
2075 goto out;
2076 }
2077
2078 intel_context_ban(rq->context, rq);
2079 err = intel_engine_pulse(arg->engine);
2080 if (err)
2081 goto out;
2082
2083 err = wait_for_reset(arg->engine, rq, HZ / 2);
2084 if (err) {
2085 pr_err("Cancelled inflight0 request did not reset\n");
2086 goto out;
2087 }
2088
2089out:
2090 i915_request_put(rq);
2091 if (igt_live_test_end(&t))
2092 err = -EIO;
2093 return err;
2094}
2095
2096static int __cancel_active1(struct live_preempt_cancel *arg)
2097{
2098 struct i915_request *rq[2] = {};
2099 struct igt_live_test t;
2100 int err;
2101
2102 /* Preempt cancel of ELSP1 */
2103 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2104 if (igt_live_test_begin(&t, arg->engine->i915,
2105 __func__, arg->engine->name))
2106 return -EIO;
2107
2108 rq[0] = spinner_create_request(&arg->a.spin,
2109 arg->a.ctx, arg->engine,
2110 MI_NOOP); /* no preemption */
2111 if (IS_ERR(rq[0]))
2112 return PTR_ERR(rq[0]);
2113
2114 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2115 i915_request_get(rq[0]);
2116 i915_request_add(rq[0]);
2117 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2118 err = -EIO;
2119 goto out;
2120 }
2121
2122 rq[1] = spinner_create_request(&arg->b.spin,
2123 arg->b.ctx, arg->engine,
2124 MI_ARB_CHECK);
2125 if (IS_ERR(rq[1])) {
2126 err = PTR_ERR(rq[1]);
2127 goto out;
2128 }
2129
2130 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2131 i915_request_get(rq[1]);
2132 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2133 i915_request_add(rq[1]);
2134 if (err)
2135 goto out;
2136
2137 intel_context_ban(rq[1]->context, rq[1]);
2138 err = intel_engine_pulse(arg->engine);
2139 if (err)
2140 goto out;
2141
2142 igt_spinner_end(&arg->a.spin);
2143 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2144 if (err)
2145 goto out;
2146
2147 if (rq[0]->fence.error != 0) {
2148 pr_err("Normal inflight0 request did not complete\n");
2149 err = -EINVAL;
2150 goto out;
2151 }
2152
2153 if (rq[1]->fence.error != -EIO) {
2154 pr_err("Cancelled inflight1 request did not report -EIO\n");
2155 err = -EINVAL;
2156 goto out;
2157 }
2158
2159out:
2160 i915_request_put(rq[1]);
2161 i915_request_put(rq[0]);
2162 if (igt_live_test_end(&t))
2163 err = -EIO;
2164 return err;
2165}
2166
2167static int __cancel_queued(struct live_preempt_cancel *arg)
2168{
2169 struct i915_request *rq[3] = {};
2170 struct igt_live_test t;
2171 int err;
2172
2173 /* Full ELSP and one in the wings */
2174 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2175 if (igt_live_test_begin(&t, arg->engine->i915,
2176 __func__, arg->engine->name))
2177 return -EIO;
2178
2179 rq[0] = spinner_create_request(&arg->a.spin,
2180 arg->a.ctx, arg->engine,
2181 MI_ARB_CHECK);
2182 if (IS_ERR(rq[0]))
2183 return PTR_ERR(rq[0]);
2184
2185 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2186 i915_request_get(rq[0]);
2187 i915_request_add(rq[0]);
2188 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2189 err = -EIO;
2190 goto out;
2191 }
2192
2193 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2194 if (IS_ERR(rq[1])) {
2195 err = PTR_ERR(rq[1]);
2196 goto out;
2197 }
2198
2199 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2200 i915_request_get(rq[1]);
2201 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2202 i915_request_add(rq[1]);
2203 if (err)
2204 goto out;
2205
2206 rq[2] = spinner_create_request(&arg->b.spin,
2207 arg->a.ctx, arg->engine,
2208 MI_ARB_CHECK);
2209 if (IS_ERR(rq[2])) {
2210 err = PTR_ERR(rq[2]);
2211 goto out;
2212 }
2213
2214 i915_request_get(rq[2]);
2215 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2216 i915_request_add(rq[2]);
2217 if (err)
2218 goto out;
2219
2220 intel_context_ban(rq[2]->context, rq[2]);
2221 err = intel_engine_pulse(arg->engine);
2222 if (err)
2223 goto out;
2224
2225 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2226 if (err)
2227 goto out;
2228
2229 if (rq[0]->fence.error != -EIO) {
2230 pr_err("Cancelled inflight0 request did not report -EIO\n");
2231 err = -EINVAL;
2232 goto out;
2233 }
2234
2235 /*
2236 * The behavior between having semaphores and not is different. With
2237 * semaphores the subsequent request is on the hardware and not cancelled
2238 * while without the request is held in the driver and cancelled.
2239 */
2240 if (intel_engine_has_semaphores(rq[1]->engine) &&
2241 rq[1]->fence.error != 0) {
2242 pr_err("Normal inflight1 request did not complete\n");
2243 err = -EINVAL;
2244 goto out;
2245 }
2246
2247 if (rq[2]->fence.error != -EIO) {
2248 pr_err("Cancelled queued request did not report -EIO\n");
2249 err = -EINVAL;
2250 goto out;
2251 }
2252
2253out:
2254 i915_request_put(rq[2]);
2255 i915_request_put(rq[1]);
2256 i915_request_put(rq[0]);
2257 if (igt_live_test_end(&t))
2258 err = -EIO;
2259 return err;
2260}
2261
2262static int __cancel_hostile(struct live_preempt_cancel *arg)
2263{
2264 struct i915_request *rq;
2265 int err;
2266
2267 /* Preempt cancel non-preemptible spinner in ELSP0 */
2268 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2269 return 0;
2270
2271 if (!intel_has_reset_engine(arg->engine->gt))
2272 return 0;
2273
2274 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2275 rq = spinner_create_request(&arg->a.spin,
2276 arg->a.ctx, arg->engine,
2277 MI_NOOP); /* preemption disabled */
2278 if (IS_ERR(rq))
2279 return PTR_ERR(rq);
2280
2281 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2282 i915_request_get(rq);
2283 i915_request_add(rq);
2284 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2285 err = -EIO;
2286 goto out;
2287 }
2288
2289 intel_context_ban(rq->context, rq);
2290 err = intel_engine_pulse(arg->engine); /* force reset */
2291 if (err)
2292 goto out;
2293
2294 err = wait_for_reset(arg->engine, rq, HZ / 2);
2295 if (err) {
2296 pr_err("Cancelled inflight0 request did not reset\n");
2297 goto out;
2298 }
2299
2300out:
2301 i915_request_put(rq);
2302 if (igt_flush_test(arg->engine->i915))
2303 err = -EIO;
2304 return err;
2305}
2306
2307static void force_reset_timeout(struct intel_engine_cs *engine)
2308{
2309 engine->reset_timeout.probability = 999;
2310 atomic_set(&engine->reset_timeout.times, -1);
2311}
2312
2313static void cancel_reset_timeout(struct intel_engine_cs *engine)
2314{
2315 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2316}
2317
2318static int __cancel_fail(struct live_preempt_cancel *arg)
2319{
2320 struct intel_engine_cs *engine = arg->engine;
2321 struct i915_request *rq;
2322 int err;
2323
2324 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
2325 return 0;
2326
2327 if (!intel_has_reset_engine(engine->gt))
2328 return 0;
2329
2330 GEM_TRACE("%s(%s)\n", __func__, engine->name);
2331 rq = spinner_create_request(&arg->a.spin,
2332 arg->a.ctx, engine,
2333 MI_NOOP); /* preemption disabled */
2334 if (IS_ERR(rq))
2335 return PTR_ERR(rq);
2336
2337 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2338 i915_request_get(rq);
2339 i915_request_add(rq);
2340 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2341 err = -EIO;
2342 goto out;
2343 }
2344
2345 intel_context_set_banned(rq->context);
2346
2347 err = intel_engine_pulse(engine);
2348 if (err)
2349 goto out;
2350
2351 force_reset_timeout(engine);
2352
2353 /* force preempt reset [failure] */
2354 while (!engine->execlists.pending[0])
2355 intel_engine_flush_submission(engine);
2356 del_timer_sync(&engine->execlists.preempt);
2357 intel_engine_flush_submission(engine);
2358
2359 cancel_reset_timeout(engine);
2360
2361 /* after failure, require heartbeats to reset device */
2362 intel_engine_set_heartbeat(engine, 1);
2363 err = wait_for_reset(engine, rq, HZ / 2);
2364 intel_engine_set_heartbeat(engine,
2365 engine->defaults.heartbeat_interval_ms);
2366 if (err) {
2367 pr_err("Cancelled inflight0 request did not reset\n");
2368 goto out;
2369 }
2370
2371out:
2372 i915_request_put(rq);
2373 if (igt_flush_test(engine->i915))
2374 err = -EIO;
2375 return err;
2376}
2377
2378static int live_preempt_cancel(void *arg)
2379{
2380 struct intel_gt *gt = arg;
2381 struct live_preempt_cancel data;
2382 enum intel_engine_id id;
2383 int err = -ENOMEM;
2384
2385 /*
2386 * To cancel an inflight context, we need to first remove it from the
2387 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2388 */
2389
2390 if (preempt_client_init(gt, &data.a))
2391 return -ENOMEM;
2392 if (preempt_client_init(gt, &data.b))
2393 goto err_client_a;
2394
2395 for_each_engine(data.engine, gt, id) {
2396 if (!intel_engine_has_preemption(data.engine))
2397 continue;
2398
2399 err = __cancel_active0(&data);
2400 if (err)
2401 goto err_wedged;
2402
2403 err = __cancel_active1(&data);
2404 if (err)
2405 goto err_wedged;
2406
2407 err = __cancel_queued(&data);
2408 if (err)
2409 goto err_wedged;
2410
2411 err = __cancel_hostile(&data);
2412 if (err)
2413 goto err_wedged;
2414
2415 err = __cancel_fail(&data);
2416 if (err)
2417 goto err_wedged;
2418 }
2419
2420 err = 0;
2421err_client_b:
2422 preempt_client_fini(&data.b);
2423err_client_a:
2424 preempt_client_fini(&data.a);
2425 return err;
2426
2427err_wedged:
2428 GEM_TRACE_DUMP();
2429 igt_spinner_end(&data.b.spin);
2430 igt_spinner_end(&data.a.spin);
2431 intel_gt_set_wedged(gt);
2432 goto err_client_b;
2433}
2434
2435static int live_suppress_self_preempt(void *arg)
2436{
2437 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2438 struct intel_gt *gt = arg;
2439 struct intel_engine_cs *engine;
2440 struct preempt_client a, b;
2441 enum intel_engine_id id;
2442 int err = -ENOMEM;
2443
2444 /*
2445 * Verify that if a preemption request does not cause a change in
2446 * the current execution order, the preempt-to-idle injection is
2447 * skipped and that we do not accidentally apply it after the CS
2448 * completion event.
2449 */
2450
2451 if (intel_uc_uses_guc_submission(>->uc))
2452 return 0; /* presume black blox */
2453
2454 if (intel_vgpu_active(gt->i915))
2455 return 0; /* GVT forces single port & request submission */
2456
2457 if (preempt_client_init(gt, &a))
2458 return -ENOMEM;
2459 if (preempt_client_init(gt, &b))
2460 goto err_client_a;
2461
2462 for_each_engine(engine, gt, id) {
2463 struct i915_request *rq_a, *rq_b;
2464 int depth;
2465
2466 if (!intel_engine_has_preemption(engine))
2467 continue;
2468
2469 if (igt_flush_test(gt->i915))
2470 goto err_wedged;
2471
2472 st_engine_heartbeat_disable(engine);
2473 engine->execlists.preempt_hang.count = 0;
2474
2475 rq_a = spinner_create_request(&a.spin,
2476 a.ctx, engine,
2477 MI_NOOP);
2478 if (IS_ERR(rq_a)) {
2479 err = PTR_ERR(rq_a);
2480 st_engine_heartbeat_enable(engine);
2481 goto err_client_b;
2482 }
2483
2484 i915_request_add(rq_a);
2485 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2486 pr_err("First client failed to start\n");
2487 st_engine_heartbeat_enable(engine);
2488 goto err_wedged;
2489 }
2490
2491 /* Keep postponing the timer to avoid premature slicing */
2492 mod_timer(&engine->execlists.timer, jiffies + HZ);
2493 for (depth = 0; depth < 8; depth++) {
2494 rq_b = spinner_create_request(&b.spin,
2495 b.ctx, engine,
2496 MI_NOOP);
2497 if (IS_ERR(rq_b)) {
2498 err = PTR_ERR(rq_b);
2499 st_engine_heartbeat_enable(engine);
2500 goto err_client_b;
2501 }
2502 i915_request_add(rq_b);
2503
2504 GEM_BUG_ON(i915_request_completed(rq_a));
2505 engine->sched_engine->schedule(rq_a, &attr);
2506 igt_spinner_end(&a.spin);
2507
2508 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2509 pr_err("Second client failed to start\n");
2510 st_engine_heartbeat_enable(engine);
2511 goto err_wedged;
2512 }
2513
2514 swap(a, b);
2515 rq_a = rq_b;
2516 }
2517 igt_spinner_end(&a.spin);
2518
2519 if (engine->execlists.preempt_hang.count) {
2520 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2521 engine->name,
2522 engine->execlists.preempt_hang.count,
2523 depth);
2524 st_engine_heartbeat_enable(engine);
2525 err = -EINVAL;
2526 goto err_client_b;
2527 }
2528
2529 st_engine_heartbeat_enable(engine);
2530 if (igt_flush_test(gt->i915))
2531 goto err_wedged;
2532 }
2533
2534 err = 0;
2535err_client_b:
2536 preempt_client_fini(&b);
2537err_client_a:
2538 preempt_client_fini(&a);
2539 return err;
2540
2541err_wedged:
2542 igt_spinner_end(&b.spin);
2543 igt_spinner_end(&a.spin);
2544 intel_gt_set_wedged(gt);
2545 err = -EIO;
2546 goto err_client_b;
2547}
2548
2549static int live_chain_preempt(void *arg)
2550{
2551 struct intel_gt *gt = arg;
2552 struct intel_engine_cs *engine;
2553 struct preempt_client hi, lo;
2554 enum intel_engine_id id;
2555 int err = -ENOMEM;
2556
2557 /*
2558 * Build a chain AB...BA between two contexts (A, B) and request
2559 * preemption of the last request. It should then complete before
2560 * the previously submitted spinner in B.
2561 */
2562
2563 if (preempt_client_init(gt, &hi))
2564 return -ENOMEM;
2565
2566 if (preempt_client_init(gt, &lo))
2567 goto err_client_hi;
2568
2569 for_each_engine(engine, gt, id) {
2570 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2571 struct igt_live_test t;
2572 struct i915_request *rq;
2573 int ring_size, count, i;
2574
2575 if (!intel_engine_has_preemption(engine))
2576 continue;
2577
2578 rq = spinner_create_request(&lo.spin,
2579 lo.ctx, engine,
2580 MI_ARB_CHECK);
2581 if (IS_ERR(rq))
2582 goto err_wedged;
2583
2584 i915_request_get(rq);
2585 i915_request_add(rq);
2586
2587 ring_size = rq->wa_tail - rq->head;
2588 if (ring_size < 0)
2589 ring_size += rq->ring->size;
2590 ring_size = rq->ring->size / ring_size;
2591 pr_debug("%s(%s): Using maximum of %d requests\n",
2592 __func__, engine->name, ring_size);
2593
2594 igt_spinner_end(&lo.spin);
2595 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2596 pr_err("Timed out waiting to flush %s\n", engine->name);
2597 i915_request_put(rq);
2598 goto err_wedged;
2599 }
2600 i915_request_put(rq);
2601
2602 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2603 err = -EIO;
2604 goto err_wedged;
2605 }
2606
2607 for_each_prime_number_from(count, 1, ring_size) {
2608 rq = spinner_create_request(&hi.spin,
2609 hi.ctx, engine,
2610 MI_ARB_CHECK);
2611 if (IS_ERR(rq))
2612 goto err_wedged;
2613 i915_request_add(rq);
2614 if (!igt_wait_for_spinner(&hi.spin, rq))
2615 goto err_wedged;
2616
2617 rq = spinner_create_request(&lo.spin,
2618 lo.ctx, engine,
2619 MI_ARB_CHECK);
2620 if (IS_ERR(rq))
2621 goto err_wedged;
2622 i915_request_add(rq);
2623
2624 for (i = 0; i < count; i++) {
2625 rq = igt_request_alloc(lo.ctx, engine);
2626 if (IS_ERR(rq))
2627 goto err_wedged;
2628 i915_request_add(rq);
2629 }
2630
2631 rq = igt_request_alloc(hi.ctx, engine);
2632 if (IS_ERR(rq))
2633 goto err_wedged;
2634
2635 i915_request_get(rq);
2636 i915_request_add(rq);
2637 engine->sched_engine->schedule(rq, &attr);
2638
2639 igt_spinner_end(&hi.spin);
2640 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2641 struct drm_printer p =
2642 drm_info_printer(gt->i915->drm.dev);
2643
2644 pr_err("Failed to preempt over chain of %d\n",
2645 count);
2646 intel_engine_dump(engine, &p,
2647 "%s\n", engine->name);
2648 i915_request_put(rq);
2649 goto err_wedged;
2650 }
2651 igt_spinner_end(&lo.spin);
2652 i915_request_put(rq);
2653
2654 rq = igt_request_alloc(lo.ctx, engine);
2655 if (IS_ERR(rq))
2656 goto err_wedged;
2657
2658 i915_request_get(rq);
2659 i915_request_add(rq);
2660
2661 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2662 struct drm_printer p =
2663 drm_info_printer(gt->i915->drm.dev);
2664
2665 pr_err("Failed to flush low priority chain of %d requests\n",
2666 count);
2667 intel_engine_dump(engine, &p,
2668 "%s\n", engine->name);
2669
2670 i915_request_put(rq);
2671 goto err_wedged;
2672 }
2673 i915_request_put(rq);
2674 }
2675
2676 if (igt_live_test_end(&t)) {
2677 err = -EIO;
2678 goto err_wedged;
2679 }
2680 }
2681
2682 err = 0;
2683err_client_lo:
2684 preempt_client_fini(&lo);
2685err_client_hi:
2686 preempt_client_fini(&hi);
2687 return err;
2688
2689err_wedged:
2690 igt_spinner_end(&hi.spin);
2691 igt_spinner_end(&lo.spin);
2692 intel_gt_set_wedged(gt);
2693 err = -EIO;
2694 goto err_client_lo;
2695}
2696
2697static int create_gang(struct intel_engine_cs *engine,
2698 struct i915_request **prev)
2699{
2700 struct drm_i915_gem_object *obj;
2701 struct intel_context *ce;
2702 struct i915_request *rq;
2703 struct i915_vma *vma;
2704 u32 *cs;
2705 int err;
2706
2707 ce = intel_context_create(engine);
2708 if (IS_ERR(ce))
2709 return PTR_ERR(ce);
2710
2711 obj = i915_gem_object_create_internal(engine->i915, 4096);
2712 if (IS_ERR(obj)) {
2713 err = PTR_ERR(obj);
2714 goto err_ce;
2715 }
2716
2717 vma = i915_vma_instance(obj, ce->vm, NULL);
2718 if (IS_ERR(vma)) {
2719 err = PTR_ERR(vma);
2720 goto err_obj;
2721 }
2722
2723 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2724 if (err)
2725 goto err_obj;
2726
2727 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2728 if (IS_ERR(cs)) {
2729 err = PTR_ERR(cs);
2730 goto err_obj;
2731 }
2732
2733 /* Semaphore target: spin until zero */
2734 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2735
2736 *cs++ = MI_SEMAPHORE_WAIT |
2737 MI_SEMAPHORE_POLL |
2738 MI_SEMAPHORE_SAD_EQ_SDD;
2739 *cs++ = 0;
2740 *cs++ = lower_32_bits(vma->node.start);
2741 *cs++ = upper_32_bits(vma->node.start);
2742
2743 if (*prev) {
2744 u64 offset = (*prev)->batch->node.start;
2745
2746 /* Terminate the spinner in the next lower priority batch. */
2747 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2748 *cs++ = lower_32_bits(offset);
2749 *cs++ = upper_32_bits(offset);
2750 *cs++ = 0;
2751 }
2752
2753 *cs++ = MI_BATCH_BUFFER_END;
2754 i915_gem_object_flush_map(obj);
2755 i915_gem_object_unpin_map(obj);
2756
2757 rq = intel_context_create_request(ce);
2758 if (IS_ERR(rq)) {
2759 err = PTR_ERR(rq);
2760 goto err_obj;
2761 }
2762
2763 rq->batch = i915_vma_get(vma);
2764 i915_request_get(rq);
2765
2766 i915_vma_lock(vma);
2767 err = i915_vma_move_to_active(vma, rq, 0);
2768 if (!err)
2769 err = rq->engine->emit_bb_start(rq,
2770 vma->node.start,
2771 PAGE_SIZE, 0);
2772 i915_vma_unlock(vma);
2773 i915_request_add(rq);
2774 if (err)
2775 goto err_rq;
2776
2777 i915_gem_object_put(obj);
2778 intel_context_put(ce);
2779
2780 rq->mock.link.next = &(*prev)->mock.link;
2781 *prev = rq;
2782 return 0;
2783
2784err_rq:
2785 i915_vma_put(rq->batch);
2786 i915_request_put(rq);
2787err_obj:
2788 i915_gem_object_put(obj);
2789err_ce:
2790 intel_context_put(ce);
2791 return err;
2792}
2793
2794static int __live_preempt_ring(struct intel_engine_cs *engine,
2795 struct igt_spinner *spin,
2796 int queue_sz, int ring_sz)
2797{
2798 struct intel_context *ce[2] = {};
2799 struct i915_request *rq;
2800 struct igt_live_test t;
2801 int err = 0;
2802 int n;
2803
2804 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2805 return -EIO;
2806
2807 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2808 struct intel_context *tmp;
2809
2810 tmp = intel_context_create(engine);
2811 if (IS_ERR(tmp)) {
2812 err = PTR_ERR(tmp);
2813 goto err_ce;
2814 }
2815
2816 tmp->ring_size = ring_sz;
2817
2818 err = intel_context_pin(tmp);
2819 if (err) {
2820 intel_context_put(tmp);
2821 goto err_ce;
2822 }
2823
2824 memset32(tmp->ring->vaddr,
2825 0xdeadbeef, /* trigger a hang if executed */
2826 tmp->ring->vma->size / sizeof(u32));
2827
2828 ce[n] = tmp;
2829 }
2830
2831 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2832 if (IS_ERR(rq)) {
2833 err = PTR_ERR(rq);
2834 goto err_ce;
2835 }
2836
2837 i915_request_get(rq);
2838 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2839 i915_request_add(rq);
2840
2841 if (!igt_wait_for_spinner(spin, rq)) {
2842 intel_gt_set_wedged(engine->gt);
2843 i915_request_put(rq);
2844 err = -ETIME;
2845 goto err_ce;
2846 }
2847
2848 /* Fill the ring, until we will cause a wrap */
2849 n = 0;
2850 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2851 struct i915_request *tmp;
2852
2853 tmp = intel_context_create_request(ce[0]);
2854 if (IS_ERR(tmp)) {
2855 err = PTR_ERR(tmp);
2856 i915_request_put(rq);
2857 goto err_ce;
2858 }
2859
2860 i915_request_add(tmp);
2861 intel_engine_flush_submission(engine);
2862 n++;
2863 }
2864 intel_engine_flush_submission(engine);
2865 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2866 engine->name, queue_sz, n,
2867 ce[0]->ring->size,
2868 ce[0]->ring->tail,
2869 ce[0]->ring->emit,
2870 rq->tail);
2871 i915_request_put(rq);
2872
2873 /* Create a second request to preempt the first ring */
2874 rq = intel_context_create_request(ce[1]);
2875 if (IS_ERR(rq)) {
2876 err = PTR_ERR(rq);
2877 goto err_ce;
2878 }
2879
2880 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2881 i915_request_get(rq);
2882 i915_request_add(rq);
2883
2884 err = wait_for_submit(engine, rq, HZ / 2);
2885 i915_request_put(rq);
2886 if (err) {
2887 pr_err("%s: preemption request was not submitted\n",
2888 engine->name);
2889 err = -ETIME;
2890 }
2891
2892 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2893 engine->name,
2894 ce[0]->ring->tail, ce[0]->ring->emit,
2895 ce[1]->ring->tail, ce[1]->ring->emit);
2896
2897err_ce:
2898 intel_engine_flush_submission(engine);
2899 igt_spinner_end(spin);
2900 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2901 if (IS_ERR_OR_NULL(ce[n]))
2902 break;
2903
2904 intel_context_unpin(ce[n]);
2905 intel_context_put(ce[n]);
2906 }
2907 if (igt_live_test_end(&t))
2908 err = -EIO;
2909 return err;
2910}
2911
2912static int live_preempt_ring(void *arg)
2913{
2914 struct intel_gt *gt = arg;
2915 struct intel_engine_cs *engine;
2916 struct igt_spinner spin;
2917 enum intel_engine_id id;
2918 int err = 0;
2919
2920 /*
2921 * Check that we rollback large chunks of a ring in order to do a
2922 * preemption event. Similar to live_unlite_ring, but looking at
2923 * ring size rather than the impact of intel_ring_direction().
2924 */
2925
2926 if (igt_spinner_init(&spin, gt))
2927 return -ENOMEM;
2928
2929 for_each_engine(engine, gt, id) {
2930 int n;
2931
2932 if (!intel_engine_has_preemption(engine))
2933 continue;
2934
2935 if (!intel_engine_can_store_dword(engine))
2936 continue;
2937
2938 st_engine_heartbeat_disable(engine);
2939
2940 for (n = 0; n <= 3; n++) {
2941 err = __live_preempt_ring(engine, &spin,
2942 n * SZ_4K / 4, SZ_4K);
2943 if (err)
2944 break;
2945 }
2946
2947 st_engine_heartbeat_enable(engine);
2948 if (err)
2949 break;
2950 }
2951
2952 igt_spinner_fini(&spin);
2953 return err;
2954}
2955
2956static int live_preempt_gang(void *arg)
2957{
2958 struct intel_gt *gt = arg;
2959 struct intel_engine_cs *engine;
2960 enum intel_engine_id id;
2961
2962 /*
2963 * Build as long a chain of preempters as we can, with each
2964 * request higher priority than the last. Once we are ready, we release
2965 * the last batch which then precolates down the chain, each releasing
2966 * the next oldest in turn. The intent is to simply push as hard as we
2967 * can with the number of preemptions, trying to exceed narrow HW
2968 * limits. At a minimum, we insist that we can sort all the user
2969 * high priority levels into execution order.
2970 */
2971
2972 for_each_engine(engine, gt, id) {
2973 struct i915_request *rq = NULL;
2974 struct igt_live_test t;
2975 IGT_TIMEOUT(end_time);
2976 int prio = 0;
2977 int err = 0;
2978 u32 *cs;
2979
2980 if (!intel_engine_has_preemption(engine))
2981 continue;
2982
2983 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2984 return -EIO;
2985
2986 do {
2987 struct i915_sched_attr attr = { .priority = prio++ };
2988
2989 err = create_gang(engine, &rq);
2990 if (err)
2991 break;
2992
2993 /* Submit each spinner at increasing priority */
2994 engine->sched_engine->schedule(rq, &attr);
2995 } while (prio <= I915_PRIORITY_MAX &&
2996 !__igt_timeout(end_time, NULL));
2997 pr_debug("%s: Preempt chain of %d requests\n",
2998 engine->name, prio);
2999
3000 /*
3001 * Such that the last spinner is the highest priority and
3002 * should execute first. When that spinner completes,
3003 * it will terminate the next lowest spinner until there
3004 * are no more spinners and the gang is complete.
3005 */
3006 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3007 if (!IS_ERR(cs)) {
3008 *cs = 0;
3009 i915_gem_object_unpin_map(rq->batch->obj);
3010 } else {
3011 err = PTR_ERR(cs);
3012 intel_gt_set_wedged(gt);
3013 }
3014
3015 while (rq) { /* wait for each rq from highest to lowest prio */
3016 struct i915_request *n = list_next_entry(rq, mock.link);
3017
3018 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3019 struct drm_printer p =
3020 drm_info_printer(engine->i915->drm.dev);
3021
3022 pr_err("Failed to flush chain of %d requests, at %d\n",
3023 prio, rq_prio(rq));
3024 intel_engine_dump(engine, &p,
3025 "%s\n", engine->name);
3026
3027 err = -ETIME;
3028 }
3029
3030 i915_vma_put(rq->batch);
3031 i915_request_put(rq);
3032 rq = n;
3033 }
3034
3035 if (igt_live_test_end(&t))
3036 err = -EIO;
3037 if (err)
3038 return err;
3039 }
3040
3041 return 0;
3042}
3043
3044static struct i915_vma *
3045create_gpr_user(struct intel_engine_cs *engine,
3046 struct i915_vma *result,
3047 unsigned int offset)
3048{
3049 struct drm_i915_gem_object *obj;
3050 struct i915_vma *vma;
3051 u32 *cs;
3052 int err;
3053 int i;
3054
3055 obj = i915_gem_object_create_internal(engine->i915, 4096);
3056 if (IS_ERR(obj))
3057 return ERR_CAST(obj);
3058
3059 vma = i915_vma_instance(obj, result->vm, NULL);
3060 if (IS_ERR(vma)) {
3061 i915_gem_object_put(obj);
3062 return vma;
3063 }
3064
3065 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3066 if (err) {
3067 i915_vma_put(vma);
3068 return ERR_PTR(err);
3069 }
3070
3071 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3072 if (IS_ERR(cs)) {
3073 i915_vma_put(vma);
3074 return ERR_CAST(cs);
3075 }
3076
3077 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3078 *cs++ = MI_LOAD_REGISTER_IMM(1);
3079 *cs++ = CS_GPR(engine, 0);
3080 *cs++ = 1;
3081
3082 for (i = 1; i < NUM_GPR; i++) {
3083 u64 addr;
3084
3085 /*
3086 * Perform: GPR[i]++
3087 *
3088 * As we read and write into the context saved GPR[i], if
3089 * we restart this batch buffer from an earlier point, we
3090 * will repeat the increment and store a value > 1.
3091 */
3092 *cs++ = MI_MATH(4);
3093 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3094 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3095 *cs++ = MI_MATH_ADD;
3096 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3097
3098 addr = result->node.start + offset + i * sizeof(*cs);
3099 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3100 *cs++ = CS_GPR(engine, 2 * i);
3101 *cs++ = lower_32_bits(addr);
3102 *cs++ = upper_32_bits(addr);
3103
3104 *cs++ = MI_SEMAPHORE_WAIT |
3105 MI_SEMAPHORE_POLL |
3106 MI_SEMAPHORE_SAD_GTE_SDD;
3107 *cs++ = i;
3108 *cs++ = lower_32_bits(result->node.start);
3109 *cs++ = upper_32_bits(result->node.start);
3110 }
3111
3112 *cs++ = MI_BATCH_BUFFER_END;
3113 i915_gem_object_flush_map(obj);
3114 i915_gem_object_unpin_map(obj);
3115
3116 return vma;
3117}
3118
3119static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3120{
3121 struct drm_i915_gem_object *obj;
3122 struct i915_vma *vma;
3123 int err;
3124
3125 obj = i915_gem_object_create_internal(gt->i915, sz);
3126 if (IS_ERR(obj))
3127 return ERR_CAST(obj);
3128
3129 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3130 if (IS_ERR(vma)) {
3131 i915_gem_object_put(obj);
3132 return vma;
3133 }
3134
3135 err = i915_ggtt_pin(vma, NULL, 0, 0);
3136 if (err) {
3137 i915_vma_put(vma);
3138 return ERR_PTR(err);
3139 }
3140
3141 return vma;
3142}
3143
3144static struct i915_request *
3145create_gpr_client(struct intel_engine_cs *engine,
3146 struct i915_vma *global,
3147 unsigned int offset)
3148{
3149 struct i915_vma *batch, *vma;
3150 struct intel_context *ce;
3151 struct i915_request *rq;
3152 int err;
3153
3154 ce = intel_context_create(engine);
3155 if (IS_ERR(ce))
3156 return ERR_CAST(ce);
3157
3158 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3159 if (IS_ERR(vma)) {
3160 err = PTR_ERR(vma);
3161 goto out_ce;
3162 }
3163
3164 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3165 if (err)
3166 goto out_ce;
3167
3168 batch = create_gpr_user(engine, vma, offset);
3169 if (IS_ERR(batch)) {
3170 err = PTR_ERR(batch);
3171 goto out_vma;
3172 }
3173
3174 rq = intel_context_create_request(ce);
3175 if (IS_ERR(rq)) {
3176 err = PTR_ERR(rq);
3177 goto out_batch;
3178 }
3179
3180 i915_vma_lock(vma);
3181 err = i915_vma_move_to_active(vma, rq, 0);
3182 i915_vma_unlock(vma);
3183
3184 i915_vma_lock(batch);
3185 if (!err)
3186 err = i915_vma_move_to_active(batch, rq, 0);
3187 if (!err)
3188 err = rq->engine->emit_bb_start(rq,
3189 batch->node.start,
3190 PAGE_SIZE, 0);
3191 i915_vma_unlock(batch);
3192 i915_vma_unpin(batch);
3193
3194 if (!err)
3195 i915_request_get(rq);
3196 i915_request_add(rq);
3197
3198out_batch:
3199 i915_vma_put(batch);
3200out_vma:
3201 i915_vma_unpin(vma);
3202out_ce:
3203 intel_context_put(ce);
3204 return err ? ERR_PTR(err) : rq;
3205}
3206
3207static int preempt_user(struct intel_engine_cs *engine,
3208 struct i915_vma *global,
3209 int id)
3210{
3211 struct i915_sched_attr attr = {
3212 .priority = I915_PRIORITY_MAX
3213 };
3214 struct i915_request *rq;
3215 int err = 0;
3216 u32 *cs;
3217
3218 rq = intel_engine_create_kernel_request(engine);
3219 if (IS_ERR(rq))
3220 return PTR_ERR(rq);
3221
3222 cs = intel_ring_begin(rq, 4);
3223 if (IS_ERR(cs)) {
3224 i915_request_add(rq);
3225 return PTR_ERR(cs);
3226 }
3227
3228 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3229 *cs++ = i915_ggtt_offset(global);
3230 *cs++ = 0;
3231 *cs++ = id;
3232
3233 intel_ring_advance(rq, cs);
3234
3235 i915_request_get(rq);
3236 i915_request_add(rq);
3237
3238 engine->sched_engine->schedule(rq, &attr);
3239
3240 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3241 err = -ETIME;
3242 i915_request_put(rq);
3243
3244 return err;
3245}
3246
3247static int live_preempt_user(void *arg)
3248{
3249 struct intel_gt *gt = arg;
3250 struct intel_engine_cs *engine;
3251 struct i915_vma *global;
3252 enum intel_engine_id id;
3253 u32 *result;
3254 int err = 0;
3255
3256 /*
3257 * In our other tests, we look at preemption in carefully
3258 * controlled conditions in the ringbuffer. Since most of the
3259 * time is spent in user batches, most of our preemptions naturally
3260 * occur there. We want to verify that when we preempt inside a batch
3261 * we continue on from the current instruction and do not roll back
3262 * to the start, or another earlier arbitration point.
3263 *
3264 * To verify this, we create a batch which is a mixture of
3265 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3266 * a few preempting contexts thrown into the mix, we look for any
3267 * repeated instructions (which show up as incorrect values).
3268 */
3269
3270 global = create_global(gt, 4096);
3271 if (IS_ERR(global))
3272 return PTR_ERR(global);
3273
3274 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3275 if (IS_ERR(result)) {
3276 i915_vma_unpin_and_release(&global, 0);
3277 return PTR_ERR(result);
3278 }
3279
3280 for_each_engine(engine, gt, id) {
3281 struct i915_request *client[3] = {};
3282 struct igt_live_test t;
3283 int i;
3284
3285 if (!intel_engine_has_preemption(engine))
3286 continue;
3287
3288 if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3289 continue; /* we need per-context GPR */
3290
3291 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3292 err = -EIO;
3293 break;
3294 }
3295
3296 memset(result, 0, 4096);
3297
3298 for (i = 0; i < ARRAY_SIZE(client); i++) {
3299 struct i915_request *rq;
3300
3301 rq = create_gpr_client(engine, global,
3302 NUM_GPR * i * sizeof(u32));
3303 if (IS_ERR(rq)) {
3304 err = PTR_ERR(rq);
3305 goto end_test;
3306 }
3307
3308 client[i] = rq;
3309 }
3310
3311 /* Continuously preempt the set of 3 running contexts */
3312 for (i = 1; i <= NUM_GPR; i++) {
3313 err = preempt_user(engine, global, i);
3314 if (err)
3315 goto end_test;
3316 }
3317
3318 if (READ_ONCE(result[0]) != NUM_GPR) {
3319 pr_err("%s: Failed to release semaphore\n",
3320 engine->name);
3321 err = -EIO;
3322 goto end_test;
3323 }
3324
3325 for (i = 0; i < ARRAY_SIZE(client); i++) {
3326 int gpr;
3327
3328 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3329 err = -ETIME;
3330 goto end_test;
3331 }
3332
3333 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3334 if (result[NUM_GPR * i + gpr] != 1) {
3335 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3336 engine->name,
3337 i, gpr, result[NUM_GPR * i + gpr]);
3338 err = -EINVAL;
3339 goto end_test;
3340 }
3341 }
3342 }
3343
3344end_test:
3345 for (i = 0; i < ARRAY_SIZE(client); i++) {
3346 if (!client[i])
3347 break;
3348
3349 i915_request_put(client[i]);
3350 }
3351
3352 /* Flush the semaphores on error */
3353 smp_store_mb(result[0], -1);
3354 if (igt_live_test_end(&t))
3355 err = -EIO;
3356 if (err)
3357 break;
3358 }
3359
3360 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3361 return err;
3362}
3363
3364static int live_preempt_timeout(void *arg)
3365{
3366 struct intel_gt *gt = arg;
3367 struct i915_gem_context *ctx_hi, *ctx_lo;
3368 struct igt_spinner spin_lo;
3369 struct intel_engine_cs *engine;
3370 enum intel_engine_id id;
3371 int err = -ENOMEM;
3372
3373 /*
3374 * Check that we force preemption to occur by cancelling the previous
3375 * context if it refuses to yield the GPU.
3376 */
3377 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3378 return 0;
3379
3380 if (!intel_has_reset_engine(gt))
3381 return 0;
3382
3383 ctx_hi = kernel_context(gt->i915, NULL);
3384 if (!ctx_hi)
3385 return -ENOMEM;
3386 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3387
3388 ctx_lo = kernel_context(gt->i915, NULL);
3389 if (!ctx_lo)
3390 goto err_ctx_hi;
3391 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3392
3393 if (igt_spinner_init(&spin_lo, gt))
3394 goto err_ctx_lo;
3395
3396 for_each_engine(engine, gt, id) {
3397 unsigned long saved_timeout;
3398 struct i915_request *rq;
3399
3400 if (!intel_engine_has_preemption(engine))
3401 continue;
3402
3403 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3404 MI_NOOP); /* preemption disabled */
3405 if (IS_ERR(rq)) {
3406 err = PTR_ERR(rq);
3407 goto err_spin_lo;
3408 }
3409
3410 i915_request_add(rq);
3411 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3412 intel_gt_set_wedged(gt);
3413 err = -EIO;
3414 goto err_spin_lo;
3415 }
3416
3417 rq = igt_request_alloc(ctx_hi, engine);
3418 if (IS_ERR(rq)) {
3419 igt_spinner_end(&spin_lo);
3420 err = PTR_ERR(rq);
3421 goto err_spin_lo;
3422 }
3423
3424 /* Flush the previous CS ack before changing timeouts */
3425 while (READ_ONCE(engine->execlists.pending[0]))
3426 cpu_relax();
3427
3428 saved_timeout = engine->props.preempt_timeout_ms;
3429 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3430
3431 i915_request_get(rq);
3432 i915_request_add(rq);
3433
3434 intel_engine_flush_submission(engine);
3435 engine->props.preempt_timeout_ms = saved_timeout;
3436
3437 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3438 intel_gt_set_wedged(gt);
3439 i915_request_put(rq);
3440 err = -ETIME;
3441 goto err_spin_lo;
3442 }
3443
3444 igt_spinner_end(&spin_lo);
3445 i915_request_put(rq);
3446 }
3447
3448 err = 0;
3449err_spin_lo:
3450 igt_spinner_fini(&spin_lo);
3451err_ctx_lo:
3452 kernel_context_close(ctx_lo);
3453err_ctx_hi:
3454 kernel_context_close(ctx_hi);
3455 return err;
3456}
3457
3458static int random_range(struct rnd_state *rnd, int min, int max)
3459{
3460 return i915_prandom_u32_max_state(max - min, rnd) + min;
3461}
3462
3463static int random_priority(struct rnd_state *rnd)
3464{
3465 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3466}
3467
3468struct preempt_smoke {
3469 struct intel_gt *gt;
3470 struct kthread_work work;
3471 struct i915_gem_context **contexts;
3472 struct intel_engine_cs *engine;
3473 struct drm_i915_gem_object *batch;
3474 unsigned int ncontext;
3475 struct rnd_state prng;
3476 unsigned long count;
3477 int result;
3478};
3479
3480static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3481{
3482 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3483 &smoke->prng)];
3484}
3485
3486static int smoke_submit(struct preempt_smoke *smoke,
3487 struct i915_gem_context *ctx, int prio,
3488 struct drm_i915_gem_object *batch)
3489{
3490 struct i915_request *rq;
3491 struct i915_vma *vma = NULL;
3492 int err = 0;
3493
3494 if (batch) {
3495 struct i915_address_space *vm;
3496
3497 vm = i915_gem_context_get_eb_vm(ctx);
3498 vma = i915_vma_instance(batch, vm, NULL);
3499 i915_vm_put(vm);
3500 if (IS_ERR(vma))
3501 return PTR_ERR(vma);
3502
3503 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3504 if (err)
3505 return err;
3506 }
3507
3508 ctx->sched.priority = prio;
3509
3510 rq = igt_request_alloc(ctx, smoke->engine);
3511 if (IS_ERR(rq)) {
3512 err = PTR_ERR(rq);
3513 goto unpin;
3514 }
3515
3516 if (vma) {
3517 i915_vma_lock(vma);
3518 err = i915_vma_move_to_active(vma, rq, 0);
3519 if (!err)
3520 err = rq->engine->emit_bb_start(rq,
3521 vma->node.start,
3522 PAGE_SIZE, 0);
3523 i915_vma_unlock(vma);
3524 }
3525
3526 i915_request_add(rq);
3527
3528unpin:
3529 if (vma)
3530 i915_vma_unpin(vma);
3531
3532 return err;
3533}
3534
3535static void smoke_crescendo_work(struct kthread_work *work)
3536{
3537 struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
3538 IGT_TIMEOUT(end_time);
3539 unsigned long count;
3540
3541 count = 0;
3542 do {
3543 struct i915_gem_context *ctx = smoke_context(smoke);
3544
3545 smoke->result = smoke_submit(smoke, ctx,
3546 count % I915_PRIORITY_MAX,
3547 smoke->batch);
3548
3549 count++;
3550 } while (!smoke->result && count < smoke->ncontext &&
3551 !__igt_timeout(end_time, NULL));
3552
3553 smoke->count = count;
3554}
3555
3556static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3557#define BATCH BIT(0)
3558{
3559 struct kthread_worker *worker[I915_NUM_ENGINES] = {};
3560 struct preempt_smoke *arg;
3561 struct intel_engine_cs *engine;
3562 enum intel_engine_id id;
3563 unsigned long count;
3564 int err = 0;
3565
3566 arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL);
3567 if (!arg)
3568 return -ENOMEM;
3569
3570 memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
3571
3572 for_each_engine(engine, smoke->gt, id) {
3573 arg[id] = *smoke;
3574 arg[id].engine = engine;
3575 if (!(flags & BATCH))
3576 arg[id].batch = NULL;
3577 arg[id].count = 0;
3578
3579 worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
3580 if (IS_ERR(worker[id])) {
3581 err = PTR_ERR(worker[id]);
3582 break;
3583 }
3584
3585 kthread_init_work(&arg[id].work, smoke_crescendo_work);
3586 kthread_queue_work(worker[id], &arg[id].work);
3587 }
3588
3589 count = 0;
3590 for_each_engine(engine, smoke->gt, id) {
3591 if (IS_ERR_OR_NULL(worker[id]))
3592 continue;
3593
3594 kthread_flush_work(&arg[id].work);
3595 if (arg[id].result && !err)
3596 err = arg[id].result;
3597
3598 count += arg[id].count;
3599
3600 kthread_destroy_worker(worker[id]);
3601 }
3602
3603 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3604 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3605
3606 kfree(arg);
3607 return 0;
3608}
3609
3610static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3611{
3612 enum intel_engine_id id;
3613 IGT_TIMEOUT(end_time);
3614 unsigned long count;
3615
3616 count = 0;
3617 do {
3618 for_each_engine(smoke->engine, smoke->gt, id) {
3619 struct i915_gem_context *ctx = smoke_context(smoke);
3620 int err;
3621
3622 err = smoke_submit(smoke,
3623 ctx, random_priority(&smoke->prng),
3624 flags & BATCH ? smoke->batch : NULL);
3625 if (err)
3626 return err;
3627
3628 count++;
3629 }
3630 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3631
3632 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3633 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3634 return 0;
3635}
3636
3637static int live_preempt_smoke(void *arg)
3638{
3639 struct preempt_smoke smoke = {
3640 .gt = arg,
3641 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3642 .ncontext = 256,
3643 };
3644 const unsigned int phase[] = { 0, BATCH };
3645 struct igt_live_test t;
3646 int err = -ENOMEM;
3647 u32 *cs;
3648 int n;
3649
3650 smoke.contexts = kmalloc_array(smoke.ncontext,
3651 sizeof(*smoke.contexts),
3652 GFP_KERNEL);
3653 if (!smoke.contexts)
3654 return -ENOMEM;
3655
3656 smoke.batch =
3657 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3658 if (IS_ERR(smoke.batch)) {
3659 err = PTR_ERR(smoke.batch);
3660 goto err_free;
3661 }
3662
3663 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3664 if (IS_ERR(cs)) {
3665 err = PTR_ERR(cs);
3666 goto err_batch;
3667 }
3668 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3669 cs[n] = MI_ARB_CHECK;
3670 cs[n] = MI_BATCH_BUFFER_END;
3671 i915_gem_object_flush_map(smoke.batch);
3672 i915_gem_object_unpin_map(smoke.batch);
3673
3674 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3675 err = -EIO;
3676 goto err_batch;
3677 }
3678
3679 for (n = 0; n < smoke.ncontext; n++) {
3680 smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL);
3681 if (!smoke.contexts[n])
3682 goto err_ctx;
3683 }
3684
3685 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3686 err = smoke_crescendo(&smoke, phase[n]);
3687 if (err)
3688 goto err_ctx;
3689
3690 err = smoke_random(&smoke, phase[n]);
3691 if (err)
3692 goto err_ctx;
3693 }
3694
3695err_ctx:
3696 if (igt_live_test_end(&t))
3697 err = -EIO;
3698
3699 for (n = 0; n < smoke.ncontext; n++) {
3700 if (!smoke.contexts[n])
3701 break;
3702 kernel_context_close(smoke.contexts[n]);
3703 }
3704
3705err_batch:
3706 i915_gem_object_put(smoke.batch);
3707err_free:
3708 kfree(smoke.contexts);
3709
3710 return err;
3711}
3712
3713static int nop_virtual_engine(struct intel_gt *gt,
3714 struct intel_engine_cs **siblings,
3715 unsigned int nsibling,
3716 unsigned int nctx,
3717 unsigned int flags)
3718#define CHAIN BIT(0)
3719{
3720 IGT_TIMEOUT(end_time);
3721 struct i915_request *request[16] = {};
3722 struct intel_context *ve[16];
3723 unsigned long n, prime, nc;
3724 struct igt_live_test t;
3725 ktime_t times[2] = {};
3726 int err;
3727
3728 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3729
3730 for (n = 0; n < nctx; n++) {
3731 ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
3732 if (IS_ERR(ve[n])) {
3733 err = PTR_ERR(ve[n]);
3734 nctx = n;
3735 goto out;
3736 }
3737
3738 err = intel_context_pin(ve[n]);
3739 if (err) {
3740 intel_context_put(ve[n]);
3741 nctx = n;
3742 goto out;
3743 }
3744 }
3745
3746 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3747 if (err)
3748 goto out;
3749
3750 for_each_prime_number_from(prime, 1, 8192) {
3751 times[1] = ktime_get_raw();
3752
3753 if (flags & CHAIN) {
3754 for (nc = 0; nc < nctx; nc++) {
3755 for (n = 0; n < prime; n++) {
3756 struct i915_request *rq;
3757
3758 rq = i915_request_create(ve[nc]);
3759 if (IS_ERR(rq)) {
3760 err = PTR_ERR(rq);
3761 goto out;
3762 }
3763
3764 if (request[nc])
3765 i915_request_put(request[nc]);
3766 request[nc] = i915_request_get(rq);
3767 i915_request_add(rq);
3768 }
3769 }
3770 } else {
3771 for (n = 0; n < prime; n++) {
3772 for (nc = 0; nc < nctx; nc++) {
3773 struct i915_request *rq;
3774
3775 rq = i915_request_create(ve[nc]);
3776 if (IS_ERR(rq)) {
3777 err = PTR_ERR(rq);
3778 goto out;
3779 }
3780
3781 if (request[nc])
3782 i915_request_put(request[nc]);
3783 request[nc] = i915_request_get(rq);
3784 i915_request_add(rq);
3785 }
3786 }
3787 }
3788
3789 for (nc = 0; nc < nctx; nc++) {
3790 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3791 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3792 __func__, ve[0]->engine->name,
3793 request[nc]->fence.context,
3794 request[nc]->fence.seqno);
3795
3796 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3797 __func__, ve[0]->engine->name,
3798 request[nc]->fence.context,
3799 request[nc]->fence.seqno);
3800 GEM_TRACE_DUMP();
3801 intel_gt_set_wedged(gt);
3802 break;
3803 }
3804 }
3805
3806 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3807 if (prime == 1)
3808 times[0] = times[1];
3809
3810 for (nc = 0; nc < nctx; nc++) {
3811 i915_request_put(request[nc]);
3812 request[nc] = NULL;
3813 }
3814
3815 if (__igt_timeout(end_time, NULL))
3816 break;
3817 }
3818
3819 err = igt_live_test_end(&t);
3820 if (err)
3821 goto out;
3822
3823 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3824 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3825 prime, div64_u64(ktime_to_ns(times[1]), prime));
3826
3827out:
3828 if (igt_flush_test(gt->i915))
3829 err = -EIO;
3830
3831 for (nc = 0; nc < nctx; nc++) {
3832 i915_request_put(request[nc]);
3833 intel_context_unpin(ve[nc]);
3834 intel_context_put(ve[nc]);
3835 }
3836 return err;
3837}
3838
3839static unsigned int
3840__select_siblings(struct intel_gt *gt,
3841 unsigned int class,
3842 struct intel_engine_cs **siblings,
3843 bool (*filter)(const struct intel_engine_cs *))
3844{
3845 unsigned int n = 0;
3846 unsigned int inst;
3847
3848 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3849 if (!gt->engine_class[class][inst])
3850 continue;
3851
3852 if (filter && !filter(gt->engine_class[class][inst]))
3853 continue;
3854
3855 siblings[n++] = gt->engine_class[class][inst];
3856 }
3857
3858 return n;
3859}
3860
3861static unsigned int
3862select_siblings(struct intel_gt *gt,
3863 unsigned int class,
3864 struct intel_engine_cs **siblings)
3865{
3866 return __select_siblings(gt, class, siblings, NULL);
3867}
3868
3869static int live_virtual_engine(void *arg)
3870{
3871 struct intel_gt *gt = arg;
3872 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3873 struct intel_engine_cs *engine;
3874 enum intel_engine_id id;
3875 unsigned int class;
3876 int err;
3877
3878 if (intel_uc_uses_guc_submission(>->uc))
3879 return 0;
3880
3881 for_each_engine(engine, gt, id) {
3882 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3883 if (err) {
3884 pr_err("Failed to wrap engine %s: err=%d\n",
3885 engine->name, err);
3886 return err;
3887 }
3888 }
3889
3890 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3891 int nsibling, n;
3892
3893 nsibling = select_siblings(gt, class, siblings);
3894 if (nsibling < 2)
3895 continue;
3896
3897 for (n = 1; n <= nsibling + 1; n++) {
3898 err = nop_virtual_engine(gt, siblings, nsibling,
3899 n, 0);
3900 if (err)
3901 return err;
3902 }
3903
3904 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3905 if (err)
3906 return err;
3907 }
3908
3909 return 0;
3910}
3911
3912static int mask_virtual_engine(struct intel_gt *gt,
3913 struct intel_engine_cs **siblings,
3914 unsigned int nsibling)
3915{
3916 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3917 struct intel_context *ve;
3918 struct igt_live_test t;
3919 unsigned int n;
3920 int err;
3921
3922 /*
3923 * Check that by setting the execution mask on a request, we can
3924 * restrict it to our desired engine within the virtual engine.
3925 */
3926
3927 ve = intel_engine_create_virtual(siblings, nsibling, 0);
3928 if (IS_ERR(ve)) {
3929 err = PTR_ERR(ve);
3930 goto out_close;
3931 }
3932
3933 err = intel_context_pin(ve);
3934 if (err)
3935 goto out_put;
3936
3937 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3938 if (err)
3939 goto out_unpin;
3940
3941 for (n = 0; n < nsibling; n++) {
3942 request[n] = i915_request_create(ve);
3943 if (IS_ERR(request[n])) {
3944 err = PTR_ERR(request[n]);
3945 nsibling = n;
3946 goto out;
3947 }
3948
3949 /* Reverse order as it's more likely to be unnatural */
3950 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3951
3952 i915_request_get(request[n]);
3953 i915_request_add(request[n]);
3954 }
3955
3956 for (n = 0; n < nsibling; n++) {
3957 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3958 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3959 __func__, ve->engine->name,
3960 request[n]->fence.context,
3961 request[n]->fence.seqno);
3962
3963 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3964 __func__, ve->engine->name,
3965 request[n]->fence.context,
3966 request[n]->fence.seqno);
3967 GEM_TRACE_DUMP();
3968 intel_gt_set_wedged(gt);
3969 err = -EIO;
3970 goto out;
3971 }
3972
3973 if (request[n]->engine != siblings[nsibling - n - 1]) {
3974 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3975 request[n]->engine->name,
3976 siblings[nsibling - n - 1]->name);
3977 err = -EINVAL;
3978 goto out;
3979 }
3980 }
3981
3982 err = igt_live_test_end(&t);
3983out:
3984 if (igt_flush_test(gt->i915))
3985 err = -EIO;
3986
3987 for (n = 0; n < nsibling; n++)
3988 i915_request_put(request[n]);
3989
3990out_unpin:
3991 intel_context_unpin(ve);
3992out_put:
3993 intel_context_put(ve);
3994out_close:
3995 return err;
3996}
3997
3998static int live_virtual_mask(void *arg)
3999{
4000 struct intel_gt *gt = arg;
4001 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4002 unsigned int class;
4003 int err;
4004
4005 if (intel_uc_uses_guc_submission(>->uc))
4006 return 0;
4007
4008 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4009 unsigned int nsibling;
4010
4011 nsibling = select_siblings(gt, class, siblings);
4012 if (nsibling < 2)
4013 continue;
4014
4015 err = mask_virtual_engine(gt, siblings, nsibling);
4016 if (err)
4017 return err;
4018 }
4019
4020 return 0;
4021}
4022
4023static int slicein_virtual_engine(struct intel_gt *gt,
4024 struct intel_engine_cs **siblings,
4025 unsigned int nsibling)
4026{
4027 const long timeout = slice_timeout(siblings[0]);
4028 struct intel_context *ce;
4029 struct i915_request *rq;
4030 struct igt_spinner spin;
4031 unsigned int n;
4032 int err = 0;
4033
4034 /*
4035 * Virtual requests must take part in timeslicing on the target engines.
4036 */
4037
4038 if (igt_spinner_init(&spin, gt))
4039 return -ENOMEM;
4040
4041 for (n = 0; n < nsibling; n++) {
4042 ce = intel_context_create(siblings[n]);
4043 if (IS_ERR(ce)) {
4044 err = PTR_ERR(ce);
4045 goto out;
4046 }
4047
4048 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4049 intel_context_put(ce);
4050 if (IS_ERR(rq)) {
4051 err = PTR_ERR(rq);
4052 goto out;
4053 }
4054
4055 i915_request_add(rq);
4056 }
4057
4058 ce = intel_engine_create_virtual(siblings, nsibling, 0);
4059 if (IS_ERR(ce)) {
4060 err = PTR_ERR(ce);
4061 goto out;
4062 }
4063
4064 rq = intel_context_create_request(ce);
4065 intel_context_put(ce);
4066 if (IS_ERR(rq)) {
4067 err = PTR_ERR(rq);
4068 goto out;
4069 }
4070
4071 i915_request_get(rq);
4072 i915_request_add(rq);
4073 if (i915_request_wait(rq, 0, timeout) < 0) {
4074 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4075 __func__, rq->engine->name);
4076 GEM_TRACE_DUMP();
4077 intel_gt_set_wedged(gt);
4078 err = -EIO;
4079 }
4080 i915_request_put(rq);
4081
4082out:
4083 igt_spinner_end(&spin);
4084 if (igt_flush_test(gt->i915))
4085 err = -EIO;
4086 igt_spinner_fini(&spin);
4087 return err;
4088}
4089
4090static int sliceout_virtual_engine(struct intel_gt *gt,
4091 struct intel_engine_cs **siblings,
4092 unsigned int nsibling)
4093{
4094 const long timeout = slice_timeout(siblings[0]);
4095 struct intel_context *ce;
4096 struct i915_request *rq;
4097 struct igt_spinner spin;
4098 unsigned int n;
4099 int err = 0;
4100
4101 /*
4102 * Virtual requests must allow others a fair timeslice.
4103 */
4104
4105 if (igt_spinner_init(&spin, gt))
4106 return -ENOMEM;
4107
4108 /* XXX We do not handle oversubscription and fairness with normal rq */
4109 for (n = 0; n < nsibling; n++) {
4110 ce = intel_engine_create_virtual(siblings, nsibling, 0);
4111 if (IS_ERR(ce)) {
4112 err = PTR_ERR(ce);
4113 goto out;
4114 }
4115
4116 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4117 intel_context_put(ce);
4118 if (IS_ERR(rq)) {
4119 err = PTR_ERR(rq);
4120 goto out;
4121 }
4122
4123 i915_request_add(rq);
4124 }
4125
4126 for (n = 0; !err && n < nsibling; n++) {
4127 ce = intel_context_create(siblings[n]);
4128 if (IS_ERR(ce)) {
4129 err = PTR_ERR(ce);
4130 goto out;
4131 }
4132
4133 rq = intel_context_create_request(ce);
4134 intel_context_put(ce);
4135 if (IS_ERR(rq)) {
4136 err = PTR_ERR(rq);
4137 goto out;
4138 }
4139
4140 i915_request_get(rq);
4141 i915_request_add(rq);
4142 if (i915_request_wait(rq, 0, timeout) < 0) {
4143 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4144 __func__, siblings[n]->name);
4145 GEM_TRACE_DUMP();
4146 intel_gt_set_wedged(gt);
4147 err = -EIO;
4148 }
4149 i915_request_put(rq);
4150 }
4151
4152out:
4153 igt_spinner_end(&spin);
4154 if (igt_flush_test(gt->i915))
4155 err = -EIO;
4156 igt_spinner_fini(&spin);
4157 return err;
4158}
4159
4160static int live_virtual_slice(void *arg)
4161{
4162 struct intel_gt *gt = arg;
4163 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4164 unsigned int class;
4165 int err;
4166
4167 if (intel_uc_uses_guc_submission(>->uc))
4168 return 0;
4169
4170 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4171 unsigned int nsibling;
4172
4173 nsibling = __select_siblings(gt, class, siblings,
4174 intel_engine_has_timeslices);
4175 if (nsibling < 2)
4176 continue;
4177
4178 err = slicein_virtual_engine(gt, siblings, nsibling);
4179 if (err)
4180 return err;
4181
4182 err = sliceout_virtual_engine(gt, siblings, nsibling);
4183 if (err)
4184 return err;
4185 }
4186
4187 return 0;
4188}
4189
4190static int preserved_virtual_engine(struct intel_gt *gt,
4191 struct intel_engine_cs **siblings,
4192 unsigned int nsibling)
4193{
4194 struct i915_request *last = NULL;
4195 struct intel_context *ve;
4196 struct i915_vma *scratch;
4197 struct igt_live_test t;
4198 unsigned int n;
4199 int err = 0;
4200 u32 *cs;
4201
4202 scratch =
4203 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4204 PAGE_SIZE);
4205 if (IS_ERR(scratch))
4206 return PTR_ERR(scratch);
4207
4208 err = i915_vma_sync(scratch);
4209 if (err)
4210 goto out_scratch;
4211
4212 ve = intel_engine_create_virtual(siblings, nsibling, 0);
4213 if (IS_ERR(ve)) {
4214 err = PTR_ERR(ve);
4215 goto out_scratch;
4216 }
4217
4218 err = intel_context_pin(ve);
4219 if (err)
4220 goto out_put;
4221
4222 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4223 if (err)
4224 goto out_unpin;
4225
4226 for (n = 0; n < NUM_GPR_DW; n++) {
4227 struct intel_engine_cs *engine = siblings[n % nsibling];
4228 struct i915_request *rq;
4229
4230 rq = i915_request_create(ve);
4231 if (IS_ERR(rq)) {
4232 err = PTR_ERR(rq);
4233 goto out_end;
4234 }
4235
4236 i915_request_put(last);
4237 last = i915_request_get(rq);
4238
4239 cs = intel_ring_begin(rq, 8);
4240 if (IS_ERR(cs)) {
4241 i915_request_add(rq);
4242 err = PTR_ERR(cs);
4243 goto out_end;
4244 }
4245
4246 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4247 *cs++ = CS_GPR(engine, n);
4248 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4249 *cs++ = 0;
4250
4251 *cs++ = MI_LOAD_REGISTER_IMM(1);
4252 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4253 *cs++ = n + 1;
4254
4255 *cs++ = MI_NOOP;
4256 intel_ring_advance(rq, cs);
4257
4258 /* Restrict this request to run on a particular engine */
4259 rq->execution_mask = engine->mask;
4260 i915_request_add(rq);
4261 }
4262
4263 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4264 err = -ETIME;
4265 goto out_end;
4266 }
4267
4268 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4269 if (IS_ERR(cs)) {
4270 err = PTR_ERR(cs);
4271 goto out_end;
4272 }
4273
4274 for (n = 0; n < NUM_GPR_DW; n++) {
4275 if (cs[n] != n) {
4276 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4277 cs[n], n);
4278 err = -EINVAL;
4279 break;
4280 }
4281 }
4282
4283 i915_gem_object_unpin_map(scratch->obj);
4284
4285out_end:
4286 if (igt_live_test_end(&t))
4287 err = -EIO;
4288 i915_request_put(last);
4289out_unpin:
4290 intel_context_unpin(ve);
4291out_put:
4292 intel_context_put(ve);
4293out_scratch:
4294 i915_vma_unpin_and_release(&scratch, 0);
4295 return err;
4296}
4297
4298static int live_virtual_preserved(void *arg)
4299{
4300 struct intel_gt *gt = arg;
4301 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4302 unsigned int class;
4303
4304 /*
4305 * Check that the context image retains non-privileged (user) registers
4306 * from one engine to the next. For this we check that the CS_GPR
4307 * are preserved.
4308 */
4309
4310 if (intel_uc_uses_guc_submission(>->uc))
4311 return 0;
4312
4313 /* As we use CS_GPR we cannot run before they existed on all engines. */
4314 if (GRAPHICS_VER(gt->i915) < 9)
4315 return 0;
4316
4317 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4318 int nsibling, err;
4319
4320 nsibling = select_siblings(gt, class, siblings);
4321 if (nsibling < 2)
4322 continue;
4323
4324 err = preserved_virtual_engine(gt, siblings, nsibling);
4325 if (err)
4326 return err;
4327 }
4328
4329 return 0;
4330}
4331
4332static int reset_virtual_engine(struct intel_gt *gt,
4333 struct intel_engine_cs **siblings,
4334 unsigned int nsibling)
4335{
4336 struct intel_engine_cs *engine;
4337 struct intel_context *ve;
4338 struct igt_spinner spin;
4339 struct i915_request *rq;
4340 unsigned int n;
4341 int err = 0;
4342
4343 /*
4344 * In order to support offline error capture for fast preempt reset,
4345 * we need to decouple the guilty request and ensure that it and its
4346 * descendents are not executed while the capture is in progress.
4347 */
4348
4349 if (igt_spinner_init(&spin, gt))
4350 return -ENOMEM;
4351
4352 ve = intel_engine_create_virtual(siblings, nsibling, 0);
4353 if (IS_ERR(ve)) {
4354 err = PTR_ERR(ve);
4355 goto out_spin;
4356 }
4357
4358 for (n = 0; n < nsibling; n++)
4359 st_engine_heartbeat_disable(siblings[n]);
4360
4361 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4362 if (IS_ERR(rq)) {
4363 err = PTR_ERR(rq);
4364 goto out_heartbeat;
4365 }
4366 i915_request_add(rq);
4367
4368 if (!igt_wait_for_spinner(&spin, rq)) {
4369 intel_gt_set_wedged(gt);
4370 err = -ETIME;
4371 goto out_heartbeat;
4372 }
4373
4374 engine = rq->engine;
4375 GEM_BUG_ON(engine == ve->engine);
4376
4377 /* Take ownership of the reset and tasklet */
4378 err = engine_lock_reset_tasklet(engine);
4379 if (err)
4380 goto out_heartbeat;
4381
4382 engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
4383 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4384
4385 /* Fake a preemption event; failed of course */
4386 spin_lock_irq(&engine->sched_engine->lock);
4387 __unwind_incomplete_requests(engine);
4388 spin_unlock_irq(&engine->sched_engine->lock);
4389 GEM_BUG_ON(rq->engine != engine);
4390
4391 /* Reset the engine while keeping our active request on hold */
4392 execlists_hold(engine, rq);
4393 GEM_BUG_ON(!i915_request_on_hold(rq));
4394
4395 __intel_engine_reset_bh(engine, NULL);
4396 GEM_BUG_ON(rq->fence.error != -EIO);
4397
4398 /* Release our grasp on the engine, letting CS flow again */
4399 engine_unlock_reset_tasklet(engine);
4400
4401 /* Check that we do not resubmit the held request */
4402 i915_request_get(rq);
4403 if (!i915_request_wait(rq, 0, HZ / 5)) {
4404 pr_err("%s: on hold request completed!\n",
4405 engine->name);
4406 intel_gt_set_wedged(gt);
4407 err = -EIO;
4408 goto out_rq;
4409 }
4410 GEM_BUG_ON(!i915_request_on_hold(rq));
4411
4412 /* But is resubmitted on release */
4413 execlists_unhold(engine, rq);
4414 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4415 pr_err("%s: held request did not complete!\n",
4416 engine->name);
4417 intel_gt_set_wedged(gt);
4418 err = -ETIME;
4419 }
4420
4421out_rq:
4422 i915_request_put(rq);
4423out_heartbeat:
4424 for (n = 0; n < nsibling; n++)
4425 st_engine_heartbeat_enable(siblings[n]);
4426
4427 intel_context_put(ve);
4428out_spin:
4429 igt_spinner_fini(&spin);
4430 return err;
4431}
4432
4433static int live_virtual_reset(void *arg)
4434{
4435 struct intel_gt *gt = arg;
4436 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4437 unsigned int class;
4438
4439 /*
4440 * Check that we handle a reset event within a virtual engine.
4441 * Only the physical engine is reset, but we have to check the flow
4442 * of the virtual requests around the reset, and make sure it is not
4443 * forgotten.
4444 */
4445
4446 if (intel_uc_uses_guc_submission(>->uc))
4447 return 0;
4448
4449 if (!intel_has_reset_engine(gt))
4450 return 0;
4451
4452 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4453 int nsibling, err;
4454
4455 nsibling = select_siblings(gt, class, siblings);
4456 if (nsibling < 2)
4457 continue;
4458
4459 err = reset_virtual_engine(gt, siblings, nsibling);
4460 if (err)
4461 return err;
4462 }
4463
4464 return 0;
4465}
4466
4467int intel_execlists_live_selftests(struct drm_i915_private *i915)
4468{
4469 static const struct i915_subtest tests[] = {
4470 SUBTEST(live_sanitycheck),
4471 SUBTEST(live_unlite_switch),
4472 SUBTEST(live_unlite_preempt),
4473 SUBTEST(live_unlite_ring),
4474 SUBTEST(live_pin_rewind),
4475 SUBTEST(live_hold_reset),
4476 SUBTEST(live_error_interrupt),
4477 SUBTEST(live_timeslice_preempt),
4478 SUBTEST(live_timeslice_rewind),
4479 SUBTEST(live_timeslice_queue),
4480 SUBTEST(live_timeslice_nopreempt),
4481 SUBTEST(live_busywait_preempt),
4482 SUBTEST(live_preempt),
4483 SUBTEST(live_late_preempt),
4484 SUBTEST(live_nopreempt),
4485 SUBTEST(live_preempt_cancel),
4486 SUBTEST(live_suppress_self_preempt),
4487 SUBTEST(live_chain_preempt),
4488 SUBTEST(live_preempt_ring),
4489 SUBTEST(live_preempt_gang),
4490 SUBTEST(live_preempt_timeout),
4491 SUBTEST(live_preempt_user),
4492 SUBTEST(live_preempt_smoke),
4493 SUBTEST(live_virtual_engine),
4494 SUBTEST(live_virtual_mask),
4495 SUBTEST(live_virtual_preserved),
4496 SUBTEST(live_virtual_slice),
4497 SUBTEST(live_virtual_reset),
4498 };
4499
4500 if (to_gt(i915)->submission_method != INTEL_SUBMISSION_ELSP)
4501 return 0;
4502
4503 if (intel_gt_is_wedged(to_gt(i915)))
4504 return 0;
4505
4506 return intel_gt_live_subtests(tests, to_gt(i915));
4507}