Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2018 Intel Corporation
4 */
5
6#include <linux/prime_numbers.h>
7
8#include "gem/i915_gem_pm.h"
9#include "gt/intel_engine_heartbeat.h"
10#include "gt/intel_reset.h"
11#include "gt/selftest_engine_heartbeat.h"
12
13#include "i915_selftest.h"
14#include "selftests/i915_random.h"
15#include "selftests/igt_flush_test.h"
16#include "selftests/igt_live_test.h"
17#include "selftests/igt_spinner.h"
18#include "selftests/lib_sw_fence.h"
19
20#include "gem/selftests/igt_gem_utils.h"
21#include "gem/selftests/mock_context.h"
22
23#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24#define NUM_GPR 16
25#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26
27static bool is_active(struct i915_request *rq)
28{
29 if (i915_request_is_active(rq))
30 return true;
31
32 if (i915_request_on_hold(rq))
33 return true;
34
35 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36 return true;
37
38 return false;
39}
40
41static int wait_for_submit(struct intel_engine_cs *engine,
42 struct i915_request *rq,
43 unsigned long timeout)
44{
45 /* Ignore our own attempts to suppress excess tasklets */
46 tasklet_hi_schedule(&engine->execlists.tasklet);
47
48 timeout += jiffies;
49 do {
50 bool done = time_after(jiffies, timeout);
51
52 if (i915_request_completed(rq)) /* that was quick! */
53 return 0;
54
55 /* Wait until the HW has acknowleged the submission (or err) */
56 intel_engine_flush_submission(engine);
57 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58 return 0;
59
60 if (done)
61 return -ETIME;
62
63 cond_resched();
64 } while (1);
65}
66
67static int wait_for_reset(struct intel_engine_cs *engine,
68 struct i915_request *rq,
69 unsigned long timeout)
70{
71 timeout += jiffies;
72
73 do {
74 cond_resched();
75 intel_engine_flush_submission(engine);
76
77 if (READ_ONCE(engine->execlists.pending[0]))
78 continue;
79
80 if (i915_request_completed(rq))
81 break;
82
83 if (READ_ONCE(rq->fence.error))
84 break;
85 } while (time_before(jiffies, timeout));
86
87 flush_scheduled_work();
88
89 if (rq->fence.error != -EIO) {
90 pr_err("%s: hanging request %llx:%lld not reset\n",
91 engine->name,
92 rq->fence.context,
93 rq->fence.seqno);
94 return -EINVAL;
95 }
96
97 /* Give the request a jiffie to complete after flushing the worker */
98 if (i915_request_wait(rq, 0,
99 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 pr_err("%s: hanging request %llx:%lld did not complete\n",
101 engine->name,
102 rq->fence.context,
103 rq->fence.seqno);
104 return -ETIME;
105 }
106
107 return 0;
108}
109
110static int live_sanitycheck(void *arg)
111{
112 struct intel_gt *gt = arg;
113 struct intel_engine_cs *engine;
114 enum intel_engine_id id;
115 struct igt_spinner spin;
116 int err = 0;
117
118 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119 return 0;
120
121 if (igt_spinner_init(&spin, gt))
122 return -ENOMEM;
123
124 for_each_engine(engine, gt, id) {
125 struct intel_context *ce;
126 struct i915_request *rq;
127
128 ce = intel_context_create(engine);
129 if (IS_ERR(ce)) {
130 err = PTR_ERR(ce);
131 break;
132 }
133
134 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135 if (IS_ERR(rq)) {
136 err = PTR_ERR(rq);
137 goto out_ctx;
138 }
139
140 i915_request_add(rq);
141 if (!igt_wait_for_spinner(&spin, rq)) {
142 GEM_TRACE("spinner failed to start\n");
143 GEM_TRACE_DUMP();
144 intel_gt_set_wedged(gt);
145 err = -EIO;
146 goto out_ctx;
147 }
148
149 igt_spinner_end(&spin);
150 if (igt_flush_test(gt->i915)) {
151 err = -EIO;
152 goto out_ctx;
153 }
154
155out_ctx:
156 intel_context_put(ce);
157 if (err)
158 break;
159 }
160
161 igt_spinner_fini(&spin);
162 return err;
163}
164
165static int live_unlite_restore(struct intel_gt *gt, int prio)
166{
167 struct intel_engine_cs *engine;
168 enum intel_engine_id id;
169 struct igt_spinner spin;
170 int err = -ENOMEM;
171
172 /*
173 * Check that we can correctly context switch between 2 instances
174 * on the same engine from the same parent context.
175 */
176
177 if (igt_spinner_init(&spin, gt))
178 return err;
179
180 err = 0;
181 for_each_engine(engine, gt, id) {
182 struct intel_context *ce[2] = {};
183 struct i915_request *rq[2];
184 struct igt_live_test t;
185 int n;
186
187 if (prio && !intel_engine_has_preemption(engine))
188 continue;
189
190 if (!intel_engine_can_store_dword(engine))
191 continue;
192
193 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194 err = -EIO;
195 break;
196 }
197 st_engine_heartbeat_disable(engine);
198
199 for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 struct intel_context *tmp;
201
202 tmp = intel_context_create(engine);
203 if (IS_ERR(tmp)) {
204 err = PTR_ERR(tmp);
205 goto err_ce;
206 }
207
208 err = intel_context_pin(tmp);
209 if (err) {
210 intel_context_put(tmp);
211 goto err_ce;
212 }
213
214 /*
215 * Setup the pair of contexts such that if we
216 * lite-restore using the RING_TAIL from ce[1] it
217 * will execute garbage from ce[0]->ring.
218 */
219 memset(tmp->ring->vaddr,
220 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 tmp->ring->vma->size);
222
223 ce[n] = tmp;
224 }
225 GEM_BUG_ON(!ce[1]->ring->size);
226 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228
229 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230 if (IS_ERR(rq[0])) {
231 err = PTR_ERR(rq[0]);
232 goto err_ce;
233 }
234
235 i915_request_get(rq[0]);
236 i915_request_add(rq[0]);
237 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238
239 if (!igt_wait_for_spinner(&spin, rq[0])) {
240 i915_request_put(rq[0]);
241 goto err_ce;
242 }
243
244 rq[1] = i915_request_create(ce[1]);
245 if (IS_ERR(rq[1])) {
246 err = PTR_ERR(rq[1]);
247 i915_request_put(rq[0]);
248 goto err_ce;
249 }
250
251 if (!prio) {
252 /*
253 * Ensure we do the switch to ce[1] on completion.
254 *
255 * rq[0] is already submitted, so this should reduce
256 * to a no-op (a wait on a request on the same engine
257 * uses the submit fence, not the completion fence),
258 * but it will install a dependency on rq[1] for rq[0]
259 * that will prevent the pair being reordered by
260 * timeslicing.
261 */
262 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263 }
264
265 i915_request_get(rq[1]);
266 i915_request_add(rq[1]);
267 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 i915_request_put(rq[0]);
269
270 if (prio) {
271 struct i915_sched_attr attr = {
272 .priority = prio,
273 };
274
275 /* Alternatively preempt the spinner with ce[1] */
276 engine->schedule(rq[1], &attr);
277 }
278
279 /* And switch back to ce[0] for good measure */
280 rq[0] = i915_request_create(ce[0]);
281 if (IS_ERR(rq[0])) {
282 err = PTR_ERR(rq[0]);
283 i915_request_put(rq[1]);
284 goto err_ce;
285 }
286
287 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 i915_request_get(rq[0]);
289 i915_request_add(rq[0]);
290 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 i915_request_put(rq[1]);
292 i915_request_put(rq[0]);
293
294err_ce:
295 intel_engine_flush_submission(engine);
296 igt_spinner_end(&spin);
297 for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 if (IS_ERR_OR_NULL(ce[n]))
299 break;
300
301 intel_context_unpin(ce[n]);
302 intel_context_put(ce[n]);
303 }
304
305 st_engine_heartbeat_enable(engine);
306 if (igt_live_test_end(&t))
307 err = -EIO;
308 if (err)
309 break;
310 }
311
312 igt_spinner_fini(&spin);
313 return err;
314}
315
316static int live_unlite_switch(void *arg)
317{
318 return live_unlite_restore(arg, 0);
319}
320
321static int live_unlite_preempt(void *arg)
322{
323 return live_unlite_restore(arg, I915_PRIORITY_MAX);
324}
325
326static int live_unlite_ring(void *arg)
327{
328 struct intel_gt *gt = arg;
329 struct intel_engine_cs *engine;
330 struct igt_spinner spin;
331 enum intel_engine_id id;
332 int err = 0;
333
334 /*
335 * Setup a preemption event that will cause almost the entire ring
336 * to be unwound, potentially fooling our intel_ring_direction()
337 * into emitting a forward lite-restore instead of the rollback.
338 */
339
340 if (igt_spinner_init(&spin, gt))
341 return -ENOMEM;
342
343 for_each_engine(engine, gt, id) {
344 struct intel_context *ce[2] = {};
345 struct i915_request *rq;
346 struct igt_live_test t;
347 int n;
348
349 if (!intel_engine_has_preemption(engine))
350 continue;
351
352 if (!intel_engine_can_store_dword(engine))
353 continue;
354
355 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356 err = -EIO;
357 break;
358 }
359 st_engine_heartbeat_disable(engine);
360
361 for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 struct intel_context *tmp;
363
364 tmp = intel_context_create(engine);
365 if (IS_ERR(tmp)) {
366 err = PTR_ERR(tmp);
367 goto err_ce;
368 }
369
370 err = intel_context_pin(tmp);
371 if (err) {
372 intel_context_put(tmp);
373 goto err_ce;
374 }
375
376 memset32(tmp->ring->vaddr,
377 0xdeadbeef, /* trigger a hang if executed */
378 tmp->ring->vma->size / sizeof(u32));
379
380 ce[n] = tmp;
381 }
382
383 /* Create max prio spinner, followed by N low prio nops */
384 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385 if (IS_ERR(rq)) {
386 err = PTR_ERR(rq);
387 goto err_ce;
388 }
389
390 i915_request_get(rq);
391 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 i915_request_add(rq);
393
394 if (!igt_wait_for_spinner(&spin, rq)) {
395 intel_gt_set_wedged(gt);
396 i915_request_put(rq);
397 err = -ETIME;
398 goto err_ce;
399 }
400
401 /* Fill the ring, until we will cause a wrap */
402 n = 0;
403 while (intel_ring_direction(ce[0]->ring,
404 rq->wa_tail,
405 ce[0]->ring->tail) <= 0) {
406 struct i915_request *tmp;
407
408 tmp = intel_context_create_request(ce[0]);
409 if (IS_ERR(tmp)) {
410 err = PTR_ERR(tmp);
411 i915_request_put(rq);
412 goto err_ce;
413 }
414
415 i915_request_add(tmp);
416 intel_engine_flush_submission(engine);
417 n++;
418 }
419 intel_engine_flush_submission(engine);
420 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421 engine->name, n,
422 ce[0]->ring->size,
423 ce[0]->ring->tail,
424 ce[0]->ring->emit,
425 rq->tail);
426 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427 rq->tail,
428 ce[0]->ring->tail) <= 0);
429 i915_request_put(rq);
430
431 /* Create a second ring to preempt the first ring after rq[0] */
432 rq = intel_context_create_request(ce[1]);
433 if (IS_ERR(rq)) {
434 err = PTR_ERR(rq);
435 goto err_ce;
436 }
437
438 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 i915_request_get(rq);
440 i915_request_add(rq);
441
442 err = wait_for_submit(engine, rq, HZ / 2);
443 i915_request_put(rq);
444 if (err) {
445 pr_err("%s: preemption request was not submitted\n",
446 engine->name);
447 err = -ETIME;
448 }
449
450 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451 engine->name,
452 ce[0]->ring->tail, ce[0]->ring->emit,
453 ce[1]->ring->tail, ce[1]->ring->emit);
454
455err_ce:
456 intel_engine_flush_submission(engine);
457 igt_spinner_end(&spin);
458 for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 if (IS_ERR_OR_NULL(ce[n]))
460 break;
461
462 intel_context_unpin(ce[n]);
463 intel_context_put(ce[n]);
464 }
465 st_engine_heartbeat_enable(engine);
466 if (igt_live_test_end(&t))
467 err = -EIO;
468 if (err)
469 break;
470 }
471
472 igt_spinner_fini(&spin);
473 return err;
474}
475
476static int live_pin_rewind(void *arg)
477{
478 struct intel_gt *gt = arg;
479 struct intel_engine_cs *engine;
480 enum intel_engine_id id;
481 int err = 0;
482
483 /*
484 * We have to be careful not to trust intel_ring too much, for example
485 * ring->head is updated upon retire which is out of sync with pinning
486 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 * or else we risk writing an older, stale value.
488 *
489 * To simulate this, let's apply a bit of deliberate sabotague.
490 */
491
492 for_each_engine(engine, gt, id) {
493 struct intel_context *ce;
494 struct i915_request *rq;
495 struct intel_ring *ring;
496 struct igt_live_test t;
497
498 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499 err = -EIO;
500 break;
501 }
502
503 ce = intel_context_create(engine);
504 if (IS_ERR(ce)) {
505 err = PTR_ERR(ce);
506 break;
507 }
508
509 err = intel_context_pin(ce);
510 if (err) {
511 intel_context_put(ce);
512 break;
513 }
514
515 /* Keep the context awake while we play games */
516 err = i915_active_acquire(&ce->active);
517 if (err) {
518 intel_context_unpin(ce);
519 intel_context_put(ce);
520 break;
521 }
522 ring = ce->ring;
523
524 /* Poison the ring, and offset the next request from HEAD */
525 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 ring->emit = ring->size / 2;
527 ring->tail = ring->emit;
528 GEM_BUG_ON(ring->head);
529
530 intel_context_unpin(ce);
531
532 /* Submit a simple nop request */
533 GEM_BUG_ON(intel_context_is_pinned(ce));
534 rq = intel_context_create_request(ce);
535 i915_active_release(&ce->active); /* e.g. async retire */
536 intel_context_put(ce);
537 if (IS_ERR(rq)) {
538 err = PTR_ERR(rq);
539 break;
540 }
541 GEM_BUG_ON(!rq->head);
542 i915_request_add(rq);
543
544 /* Expect not to hang! */
545 if (igt_live_test_end(&t)) {
546 err = -EIO;
547 break;
548 }
549 }
550
551 return err;
552}
553
554static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
555{
556 tasklet_disable(&engine->execlists.tasklet);
557 local_bh_disable();
558
559 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
560 &engine->gt->reset.flags)) {
561 local_bh_enable();
562 tasklet_enable(&engine->execlists.tasklet);
563
564 intel_gt_set_wedged(engine->gt);
565 return -EBUSY;
566 }
567
568 return 0;
569}
570
571static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
572{
573 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
574 &engine->gt->reset.flags);
575
576 local_bh_enable();
577 tasklet_enable(&engine->execlists.tasklet);
578}
579
580static int live_hold_reset(void *arg)
581{
582 struct intel_gt *gt = arg;
583 struct intel_engine_cs *engine;
584 enum intel_engine_id id;
585 struct igt_spinner spin;
586 int err = 0;
587
588 /*
589 * In order to support offline error capture for fast preempt reset,
590 * we need to decouple the guilty request and ensure that it and its
591 * descendents are not executed while the capture is in progress.
592 */
593
594 if (!intel_has_reset_engine(gt))
595 return 0;
596
597 if (igt_spinner_init(&spin, gt))
598 return -ENOMEM;
599
600 for_each_engine(engine, gt, id) {
601 struct intel_context *ce;
602 struct i915_request *rq;
603
604 ce = intel_context_create(engine);
605 if (IS_ERR(ce)) {
606 err = PTR_ERR(ce);
607 break;
608 }
609
610 st_engine_heartbeat_disable(engine);
611
612 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
613 if (IS_ERR(rq)) {
614 err = PTR_ERR(rq);
615 goto out;
616 }
617 i915_request_add(rq);
618
619 if (!igt_wait_for_spinner(&spin, rq)) {
620 intel_gt_set_wedged(gt);
621 err = -ETIME;
622 goto out;
623 }
624
625 /* We have our request executing, now remove it and reset */
626
627 err = engine_lock_reset_tasklet(engine);
628 if (err)
629 goto out;
630
631 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
632 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
633
634 i915_request_get(rq);
635 execlists_hold(engine, rq);
636 GEM_BUG_ON(!i915_request_on_hold(rq));
637
638 __intel_engine_reset_bh(engine, NULL);
639 GEM_BUG_ON(rq->fence.error != -EIO);
640
641 engine_unlock_reset_tasklet(engine);
642
643 /* Check that we do not resubmit the held request */
644 if (!i915_request_wait(rq, 0, HZ / 5)) {
645 pr_err("%s: on hold request completed!\n",
646 engine->name);
647 i915_request_put(rq);
648 err = -EIO;
649 goto out;
650 }
651 GEM_BUG_ON(!i915_request_on_hold(rq));
652
653 /* But is resubmitted on release */
654 execlists_unhold(engine, rq);
655 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
656 pr_err("%s: held request did not complete!\n",
657 engine->name);
658 intel_gt_set_wedged(gt);
659 err = -ETIME;
660 }
661 i915_request_put(rq);
662
663out:
664 st_engine_heartbeat_enable(engine);
665 intel_context_put(ce);
666 if (err)
667 break;
668 }
669
670 igt_spinner_fini(&spin);
671 return err;
672}
673
674static const char *error_repr(int err)
675{
676 return err ? "bad" : "good";
677}
678
679static int live_error_interrupt(void *arg)
680{
681 static const struct error_phase {
682 enum { GOOD = 0, BAD = -EIO } error[2];
683 } phases[] = {
684 { { BAD, GOOD } },
685 { { BAD, BAD } },
686 { { BAD, GOOD } },
687 { { GOOD, GOOD } }, /* sentinel */
688 };
689 struct intel_gt *gt = arg;
690 struct intel_engine_cs *engine;
691 enum intel_engine_id id;
692
693 /*
694 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
695 * of invalid commands in user batches that will cause a GPU hang.
696 * This is a faster mechanism than using hangcheck/heartbeats, but
697 * only detects problems the HW knows about -- it will not warn when
698 * we kill the HW!
699 *
700 * To verify our detection and reset, we throw some invalid commands
701 * at the HW and wait for the interrupt.
702 */
703
704 if (!intel_has_reset_engine(gt))
705 return 0;
706
707 for_each_engine(engine, gt, id) {
708 const struct error_phase *p;
709 int err = 0;
710
711 st_engine_heartbeat_disable(engine);
712
713 for (p = phases; p->error[0] != GOOD; p++) {
714 struct i915_request *client[ARRAY_SIZE(phases->error)];
715 u32 *cs;
716 int i;
717
718 memset(client, 0, sizeof(*client));
719 for (i = 0; i < ARRAY_SIZE(client); i++) {
720 struct intel_context *ce;
721 struct i915_request *rq;
722
723 ce = intel_context_create(engine);
724 if (IS_ERR(ce)) {
725 err = PTR_ERR(ce);
726 goto out;
727 }
728
729 rq = intel_context_create_request(ce);
730 intel_context_put(ce);
731 if (IS_ERR(rq)) {
732 err = PTR_ERR(rq);
733 goto out;
734 }
735
736 if (rq->engine->emit_init_breadcrumb) {
737 err = rq->engine->emit_init_breadcrumb(rq);
738 if (err) {
739 i915_request_add(rq);
740 goto out;
741 }
742 }
743
744 cs = intel_ring_begin(rq, 2);
745 if (IS_ERR(cs)) {
746 i915_request_add(rq);
747 err = PTR_ERR(cs);
748 goto out;
749 }
750
751 if (p->error[i]) {
752 *cs++ = 0xdeadbeef;
753 *cs++ = 0xdeadbeef;
754 } else {
755 *cs++ = MI_NOOP;
756 *cs++ = MI_NOOP;
757 }
758
759 client[i] = i915_request_get(rq);
760 i915_request_add(rq);
761 }
762
763 err = wait_for_submit(engine, client[0], HZ / 2);
764 if (err) {
765 pr_err("%s: first request did not start within time!\n",
766 engine->name);
767 err = -ETIME;
768 goto out;
769 }
770
771 for (i = 0; i < ARRAY_SIZE(client); i++) {
772 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
773 pr_debug("%s: %s request incomplete!\n",
774 engine->name,
775 error_repr(p->error[i]));
776
777 if (!i915_request_started(client[i])) {
778 pr_err("%s: %s request not started!\n",
779 engine->name,
780 error_repr(p->error[i]));
781 err = -ETIME;
782 goto out;
783 }
784
785 /* Kick the tasklet to process the error */
786 intel_engine_flush_submission(engine);
787 if (client[i]->fence.error != p->error[i]) {
788 pr_err("%s: %s request (%s) with wrong error code: %d\n",
789 engine->name,
790 error_repr(p->error[i]),
791 i915_request_completed(client[i]) ? "completed" : "running",
792 client[i]->fence.error);
793 err = -EINVAL;
794 goto out;
795 }
796 }
797
798out:
799 for (i = 0; i < ARRAY_SIZE(client); i++)
800 if (client[i])
801 i915_request_put(client[i]);
802 if (err) {
803 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
804 engine->name, p - phases,
805 p->error[0], p->error[1]);
806 break;
807 }
808 }
809
810 st_engine_heartbeat_enable(engine);
811 if (err) {
812 intel_gt_set_wedged(gt);
813 return err;
814 }
815 }
816
817 return 0;
818}
819
820static int
821emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
822{
823 u32 *cs;
824
825 cs = intel_ring_begin(rq, 10);
826 if (IS_ERR(cs))
827 return PTR_ERR(cs);
828
829 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
830
831 *cs++ = MI_SEMAPHORE_WAIT |
832 MI_SEMAPHORE_GLOBAL_GTT |
833 MI_SEMAPHORE_POLL |
834 MI_SEMAPHORE_SAD_NEQ_SDD;
835 *cs++ = 0;
836 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
837 *cs++ = 0;
838
839 if (idx > 0) {
840 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
841 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
842 *cs++ = 0;
843 *cs++ = 1;
844 } else {
845 *cs++ = MI_NOOP;
846 *cs++ = MI_NOOP;
847 *cs++ = MI_NOOP;
848 *cs++ = MI_NOOP;
849 }
850
851 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
852
853 intel_ring_advance(rq, cs);
854 return 0;
855}
856
857static struct i915_request *
858semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
859{
860 struct intel_context *ce;
861 struct i915_request *rq;
862 int err;
863
864 ce = intel_context_create(engine);
865 if (IS_ERR(ce))
866 return ERR_CAST(ce);
867
868 rq = intel_context_create_request(ce);
869 if (IS_ERR(rq))
870 goto out_ce;
871
872 err = 0;
873 if (rq->engine->emit_init_breadcrumb)
874 err = rq->engine->emit_init_breadcrumb(rq);
875 if (err == 0)
876 err = emit_semaphore_chain(rq, vma, idx);
877 if (err == 0)
878 i915_request_get(rq);
879 i915_request_add(rq);
880 if (err)
881 rq = ERR_PTR(err);
882
883out_ce:
884 intel_context_put(ce);
885 return rq;
886}
887
888static int
889release_queue(struct intel_engine_cs *engine,
890 struct i915_vma *vma,
891 int idx, int prio)
892{
893 struct i915_sched_attr attr = {
894 .priority = prio,
895 };
896 struct i915_request *rq;
897 u32 *cs;
898
899 rq = intel_engine_create_kernel_request(engine);
900 if (IS_ERR(rq))
901 return PTR_ERR(rq);
902
903 cs = intel_ring_begin(rq, 4);
904 if (IS_ERR(cs)) {
905 i915_request_add(rq);
906 return PTR_ERR(cs);
907 }
908
909 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
910 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
911 *cs++ = 0;
912 *cs++ = 1;
913
914 intel_ring_advance(rq, cs);
915
916 i915_request_get(rq);
917 i915_request_add(rq);
918
919 local_bh_disable();
920 engine->schedule(rq, &attr);
921 local_bh_enable(); /* kick tasklet */
922
923 i915_request_put(rq);
924
925 return 0;
926}
927
928static int
929slice_semaphore_queue(struct intel_engine_cs *outer,
930 struct i915_vma *vma,
931 int count)
932{
933 struct intel_engine_cs *engine;
934 struct i915_request *head;
935 enum intel_engine_id id;
936 int err, i, n = 0;
937
938 head = semaphore_queue(outer, vma, n++);
939 if (IS_ERR(head))
940 return PTR_ERR(head);
941
942 for_each_engine(engine, outer->gt, id) {
943 if (!intel_engine_has_preemption(engine))
944 continue;
945
946 for (i = 0; i < count; i++) {
947 struct i915_request *rq;
948
949 rq = semaphore_queue(engine, vma, n++);
950 if (IS_ERR(rq)) {
951 err = PTR_ERR(rq);
952 goto out;
953 }
954
955 i915_request_put(rq);
956 }
957 }
958
959 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
960 if (err)
961 goto out;
962
963 if (i915_request_wait(head, 0,
964 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
965 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
966 outer->name, count, n);
967 GEM_TRACE_DUMP();
968 intel_gt_set_wedged(outer->gt);
969 err = -EIO;
970 }
971
972out:
973 i915_request_put(head);
974 return err;
975}
976
977static int live_timeslice_preempt(void *arg)
978{
979 struct intel_gt *gt = arg;
980 struct drm_i915_gem_object *obj;
981 struct intel_engine_cs *engine;
982 enum intel_engine_id id;
983 struct i915_vma *vma;
984 void *vaddr;
985 int err = 0;
986
987 /*
988 * If a request takes too long, we would like to give other users
989 * a fair go on the GPU. In particular, users may create batches
990 * that wait upon external input, where that input may even be
991 * supplied by another GPU job. To avoid blocking forever, we
992 * need to preempt the current task and replace it with another
993 * ready task.
994 */
995 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
996 return 0;
997
998 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
999 if (IS_ERR(obj))
1000 return PTR_ERR(obj);
1001
1002 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1003 if (IS_ERR(vma)) {
1004 err = PTR_ERR(vma);
1005 goto err_obj;
1006 }
1007
1008 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009 if (IS_ERR(vaddr)) {
1010 err = PTR_ERR(vaddr);
1011 goto err_obj;
1012 }
1013
1014 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015 if (err)
1016 goto err_map;
1017
1018 err = i915_vma_sync(vma);
1019 if (err)
1020 goto err_pin;
1021
1022 for_each_engine(engine, gt, id) {
1023 if (!intel_engine_has_preemption(engine))
1024 continue;
1025
1026 memset(vaddr, 0, PAGE_SIZE);
1027
1028 st_engine_heartbeat_disable(engine);
1029 err = slice_semaphore_queue(engine, vma, 5);
1030 st_engine_heartbeat_enable(engine);
1031 if (err)
1032 goto err_pin;
1033
1034 if (igt_flush_test(gt->i915)) {
1035 err = -EIO;
1036 goto err_pin;
1037 }
1038 }
1039
1040err_pin:
1041 i915_vma_unpin(vma);
1042err_map:
1043 i915_gem_object_unpin_map(obj);
1044err_obj:
1045 i915_gem_object_put(obj);
1046 return err;
1047}
1048
1049static struct i915_request *
1050create_rewinder(struct intel_context *ce,
1051 struct i915_request *wait,
1052 void *slot, int idx)
1053{
1054 const u32 offset =
1055 i915_ggtt_offset(ce->engine->status_page.vma) +
1056 offset_in_page(slot);
1057 struct i915_request *rq;
1058 u32 *cs;
1059 int err;
1060
1061 rq = intel_context_create_request(ce);
1062 if (IS_ERR(rq))
1063 return rq;
1064
1065 if (wait) {
1066 err = i915_request_await_dma_fence(rq, &wait->fence);
1067 if (err)
1068 goto err;
1069 }
1070
1071 cs = intel_ring_begin(rq, 14);
1072 if (IS_ERR(cs)) {
1073 err = PTR_ERR(cs);
1074 goto err;
1075 }
1076
1077 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078 *cs++ = MI_NOOP;
1079
1080 *cs++ = MI_SEMAPHORE_WAIT |
1081 MI_SEMAPHORE_GLOBAL_GTT |
1082 MI_SEMAPHORE_POLL |
1083 MI_SEMAPHORE_SAD_GTE_SDD;
1084 *cs++ = idx;
1085 *cs++ = offset;
1086 *cs++ = 0;
1087
1088 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090 *cs++ = offset + idx * sizeof(u32);
1091 *cs++ = 0;
1092
1093 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094 *cs++ = offset;
1095 *cs++ = 0;
1096 *cs++ = idx + 1;
1097
1098 intel_ring_advance(rq, cs);
1099
1100 err = 0;
1101err:
1102 i915_request_get(rq);
1103 i915_request_add(rq);
1104 if (err) {
1105 i915_request_put(rq);
1106 return ERR_PTR(err);
1107 }
1108
1109 return rq;
1110}
1111
1112static int live_timeslice_rewind(void *arg)
1113{
1114 struct intel_gt *gt = arg;
1115 struct intel_engine_cs *engine;
1116 enum intel_engine_id id;
1117
1118 /*
1119 * The usual presumption on timeslice expiration is that we replace
1120 * the active context with another. However, given a chain of
1121 * dependencies we may end up with replacing the context with itself,
1122 * but only a few of those requests, forcing us to rewind the
1123 * RING_TAIL of the original request.
1124 */
1125 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126 return 0;
1127
1128 for_each_engine(engine, gt, id) {
1129 enum { A1, A2, B1 };
1130 enum { X = 1, Z, Y };
1131 struct i915_request *rq[3] = {};
1132 struct intel_context *ce;
1133 unsigned long timeslice;
1134 int i, err = 0;
1135 u32 *slot;
1136
1137 if (!intel_engine_has_timeslices(engine))
1138 continue;
1139
1140 /*
1141 * A:rq1 -- semaphore wait, timestamp X
1142 * A:rq2 -- write timestamp Y
1143 *
1144 * B:rq1 [await A:rq1] -- write timestamp Z
1145 *
1146 * Force timeslice, release semaphore.
1147 *
1148 * Expect execution/evaluation order XZY
1149 */
1150
1151 st_engine_heartbeat_disable(engine);
1152 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153
1154 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155
1156 ce = intel_context_create(engine);
1157 if (IS_ERR(ce)) {
1158 err = PTR_ERR(ce);
1159 goto err;
1160 }
1161
1162 rq[A1] = create_rewinder(ce, NULL, slot, X);
1163 if (IS_ERR(rq[A1])) {
1164 intel_context_put(ce);
1165 goto err;
1166 }
1167
1168 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169 intel_context_put(ce);
1170 if (IS_ERR(rq[A2]))
1171 goto err;
1172
1173 err = wait_for_submit(engine, rq[A2], HZ / 2);
1174 if (err) {
1175 pr_err("%s: failed to submit first context\n",
1176 engine->name);
1177 goto err;
1178 }
1179
1180 ce = intel_context_create(engine);
1181 if (IS_ERR(ce)) {
1182 err = PTR_ERR(ce);
1183 goto err;
1184 }
1185
1186 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187 intel_context_put(ce);
1188 if (IS_ERR(rq[2]))
1189 goto err;
1190
1191 err = wait_for_submit(engine, rq[B1], HZ / 2);
1192 if (err) {
1193 pr_err("%s: failed to submit second context\n",
1194 engine->name);
1195 goto err;
1196 }
1197
1198 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201 /* Wait for the timeslice to kick in */
1202 del_timer(&engine->execlists.timer);
1203 tasklet_hi_schedule(&engine->execlists.tasklet);
1204 intel_engine_flush_submission(engine);
1205 }
1206 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210
1211 /* Release the hounds! */
1212 slot[0] = 1;
1213 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214
1215 for (i = 1; i <= 3; i++) {
1216 unsigned long timeout = jiffies + HZ / 2;
1217
1218 while (!READ_ONCE(slot[i]) &&
1219 time_before(jiffies, timeout))
1220 ;
1221
1222 if (!time_before(jiffies, timeout)) {
1223 pr_err("%s: rq[%d] timed out\n",
1224 engine->name, i - 1);
1225 err = -ETIME;
1226 goto err;
1227 }
1228
1229 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230 }
1231
1232 /* XZY: XZ < XY */
1233 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235 engine->name,
1236 slot[Z] - slot[X],
1237 slot[Y] - slot[X]);
1238 err = -EINVAL;
1239 }
1240
1241err:
1242 memset32(&slot[0], -1, 4);
1243 wmb();
1244
1245 engine->props.timeslice_duration_ms = timeslice;
1246 st_engine_heartbeat_enable(engine);
1247 for (i = 0; i < 3; i++)
1248 i915_request_put(rq[i]);
1249 if (igt_flush_test(gt->i915))
1250 err = -EIO;
1251 if (err)
1252 return err;
1253 }
1254
1255 return 0;
1256}
1257
1258static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259{
1260 struct i915_request *rq;
1261
1262 rq = intel_engine_create_kernel_request(engine);
1263 if (IS_ERR(rq))
1264 return rq;
1265
1266 i915_request_get(rq);
1267 i915_request_add(rq);
1268
1269 return rq;
1270}
1271
1272static long slice_timeout(struct intel_engine_cs *engine)
1273{
1274 long timeout;
1275
1276 /* Enough time for a timeslice to kick in, and kick out */
1277 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278
1279 /* Enough time for the nop request to complete */
1280 timeout += HZ / 5;
1281
1282 return timeout + 1;
1283}
1284
1285static int live_timeslice_queue(void *arg)
1286{
1287 struct intel_gt *gt = arg;
1288 struct drm_i915_gem_object *obj;
1289 struct intel_engine_cs *engine;
1290 enum intel_engine_id id;
1291 struct i915_vma *vma;
1292 void *vaddr;
1293 int err = 0;
1294
1295 /*
1296 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297 * timeslicing between them disabled, we *do* enable timeslicing
1298 * if the queue demands it. (Normally, we do not submit if
1299 * ELSP[1] is already occupied, so must rely on timeslicing to
1300 * eject ELSP[0] in favour of the queue.)
1301 */
1302 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303 return 0;
1304
1305 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306 if (IS_ERR(obj))
1307 return PTR_ERR(obj);
1308
1309 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1310 if (IS_ERR(vma)) {
1311 err = PTR_ERR(vma);
1312 goto err_obj;
1313 }
1314
1315 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316 if (IS_ERR(vaddr)) {
1317 err = PTR_ERR(vaddr);
1318 goto err_obj;
1319 }
1320
1321 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322 if (err)
1323 goto err_map;
1324
1325 err = i915_vma_sync(vma);
1326 if (err)
1327 goto err_pin;
1328
1329 for_each_engine(engine, gt, id) {
1330 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331 struct i915_request *rq, *nop;
1332
1333 if (!intel_engine_has_preemption(engine))
1334 continue;
1335
1336 st_engine_heartbeat_disable(engine);
1337 memset(vaddr, 0, PAGE_SIZE);
1338
1339 /* ELSP[0]: semaphore wait */
1340 rq = semaphore_queue(engine, vma, 0);
1341 if (IS_ERR(rq)) {
1342 err = PTR_ERR(rq);
1343 goto err_heartbeat;
1344 }
1345 engine->schedule(rq, &attr);
1346 err = wait_for_submit(engine, rq, HZ / 2);
1347 if (err) {
1348 pr_err("%s: Timed out trying to submit semaphores\n",
1349 engine->name);
1350 goto err_rq;
1351 }
1352
1353 /* ELSP[1]: nop request */
1354 nop = nop_request(engine);
1355 if (IS_ERR(nop)) {
1356 err = PTR_ERR(nop);
1357 goto err_rq;
1358 }
1359 err = wait_for_submit(engine, nop, HZ / 2);
1360 i915_request_put(nop);
1361 if (err) {
1362 pr_err("%s: Timed out trying to submit nop\n",
1363 engine->name);
1364 goto err_rq;
1365 }
1366
1367 GEM_BUG_ON(i915_request_completed(rq));
1368 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369
1370 /* Queue: semaphore signal, matching priority as semaphore */
1371 err = release_queue(engine, vma, 1, effective_prio(rq));
1372 if (err)
1373 goto err_rq;
1374
1375 /* Wait until we ack the release_queue and start timeslicing */
1376 do {
1377 cond_resched();
1378 intel_engine_flush_submission(engine);
1379 } while (READ_ONCE(engine->execlists.pending[0]));
1380
1381 /* Timeslice every jiffy, so within 2 we should signal */
1382 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383 struct drm_printer p =
1384 drm_info_printer(gt->i915->drm.dev);
1385
1386 pr_err("%s: Failed to timeslice into queue\n",
1387 engine->name);
1388 intel_engine_dump(engine, &p,
1389 "%s\n", engine->name);
1390
1391 memset(vaddr, 0xff, PAGE_SIZE);
1392 err = -EIO;
1393 }
1394err_rq:
1395 i915_request_put(rq);
1396err_heartbeat:
1397 st_engine_heartbeat_enable(engine);
1398 if (err)
1399 break;
1400 }
1401
1402err_pin:
1403 i915_vma_unpin(vma);
1404err_map:
1405 i915_gem_object_unpin_map(obj);
1406err_obj:
1407 i915_gem_object_put(obj);
1408 return err;
1409}
1410
1411static int live_timeslice_nopreempt(void *arg)
1412{
1413 struct intel_gt *gt = arg;
1414 struct intel_engine_cs *engine;
1415 enum intel_engine_id id;
1416 struct igt_spinner spin;
1417 int err = 0;
1418
1419 /*
1420 * We should not timeslice into a request that is marked with
1421 * I915_REQUEST_NOPREEMPT.
1422 */
1423 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424 return 0;
1425
1426 if (igt_spinner_init(&spin, gt))
1427 return -ENOMEM;
1428
1429 for_each_engine(engine, gt, id) {
1430 struct intel_context *ce;
1431 struct i915_request *rq;
1432 unsigned long timeslice;
1433
1434 if (!intel_engine_has_preemption(engine))
1435 continue;
1436
1437 ce = intel_context_create(engine);
1438 if (IS_ERR(ce)) {
1439 err = PTR_ERR(ce);
1440 break;
1441 }
1442
1443 st_engine_heartbeat_disable(engine);
1444 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445
1446 /* Create an unpreemptible spinner */
1447
1448 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449 intel_context_put(ce);
1450 if (IS_ERR(rq)) {
1451 err = PTR_ERR(rq);
1452 goto out_heartbeat;
1453 }
1454
1455 i915_request_get(rq);
1456 i915_request_add(rq);
1457
1458 if (!igt_wait_for_spinner(&spin, rq)) {
1459 i915_request_put(rq);
1460 err = -ETIME;
1461 goto out_spin;
1462 }
1463
1464 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465 i915_request_put(rq);
1466
1467 /* Followed by a maximum priority barrier (heartbeat) */
1468
1469 ce = intel_context_create(engine);
1470 if (IS_ERR(ce)) {
1471 err = PTR_ERR(ce);
1472 goto out_spin;
1473 }
1474
1475 rq = intel_context_create_request(ce);
1476 intel_context_put(ce);
1477 if (IS_ERR(rq)) {
1478 err = PTR_ERR(rq);
1479 goto out_spin;
1480 }
1481
1482 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483 i915_request_get(rq);
1484 i915_request_add(rq);
1485
1486 /*
1487 * Wait until the barrier is in ELSP, and we know timeslicing
1488 * will have been activated.
1489 */
1490 if (wait_for_submit(engine, rq, HZ / 2)) {
1491 i915_request_put(rq);
1492 err = -ETIME;
1493 goto out_spin;
1494 }
1495
1496 /*
1497 * Since the ELSP[0] request is unpreemptible, it should not
1498 * allow the maximum priority barrier through. Wait long
1499 * enough to see if it is timesliced in by mistake.
1500 */
1501 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503 engine->name);
1504 err = -EINVAL;
1505 }
1506 i915_request_put(rq);
1507
1508out_spin:
1509 igt_spinner_end(&spin);
1510out_heartbeat:
1511 xchg(&engine->props.timeslice_duration_ms, timeslice);
1512 st_engine_heartbeat_enable(engine);
1513 if (err)
1514 break;
1515
1516 if (igt_flush_test(gt->i915)) {
1517 err = -EIO;
1518 break;
1519 }
1520 }
1521
1522 igt_spinner_fini(&spin);
1523 return err;
1524}
1525
1526static int live_busywait_preempt(void *arg)
1527{
1528 struct intel_gt *gt = arg;
1529 struct i915_gem_context *ctx_hi, *ctx_lo;
1530 struct intel_engine_cs *engine;
1531 struct drm_i915_gem_object *obj;
1532 struct i915_vma *vma;
1533 enum intel_engine_id id;
1534 int err = -ENOMEM;
1535 u32 *map;
1536
1537 /*
1538 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539 * preempt the busywaits used to synchronise between rings.
1540 */
1541
1542 ctx_hi = kernel_context(gt->i915);
1543 if (!ctx_hi)
1544 return -ENOMEM;
1545 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1546
1547 ctx_lo = kernel_context(gt->i915);
1548 if (!ctx_lo)
1549 goto err_ctx_hi;
1550 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1551
1552 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1553 if (IS_ERR(obj)) {
1554 err = PTR_ERR(obj);
1555 goto err_ctx_lo;
1556 }
1557
1558 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1559 if (IS_ERR(map)) {
1560 err = PTR_ERR(map);
1561 goto err_obj;
1562 }
1563
1564 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1565 if (IS_ERR(vma)) {
1566 err = PTR_ERR(vma);
1567 goto err_map;
1568 }
1569
1570 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1571 if (err)
1572 goto err_map;
1573
1574 err = i915_vma_sync(vma);
1575 if (err)
1576 goto err_vma;
1577
1578 for_each_engine(engine, gt, id) {
1579 struct i915_request *lo, *hi;
1580 struct igt_live_test t;
1581 u32 *cs;
1582
1583 if (!intel_engine_has_preemption(engine))
1584 continue;
1585
1586 if (!intel_engine_can_store_dword(engine))
1587 continue;
1588
1589 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1590 err = -EIO;
1591 goto err_vma;
1592 }
1593
1594 /*
1595 * We create two requests. The low priority request
1596 * busywaits on a semaphore (inside the ringbuffer where
1597 * is should be preemptible) and the high priority requests
1598 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1599 * allowing the first request to complete. If preemption
1600 * fails, we hang instead.
1601 */
1602
1603 lo = igt_request_alloc(ctx_lo, engine);
1604 if (IS_ERR(lo)) {
1605 err = PTR_ERR(lo);
1606 goto err_vma;
1607 }
1608
1609 cs = intel_ring_begin(lo, 8);
1610 if (IS_ERR(cs)) {
1611 err = PTR_ERR(cs);
1612 i915_request_add(lo);
1613 goto err_vma;
1614 }
1615
1616 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1617 *cs++ = i915_ggtt_offset(vma);
1618 *cs++ = 0;
1619 *cs++ = 1;
1620
1621 /* XXX Do we need a flush + invalidate here? */
1622
1623 *cs++ = MI_SEMAPHORE_WAIT |
1624 MI_SEMAPHORE_GLOBAL_GTT |
1625 MI_SEMAPHORE_POLL |
1626 MI_SEMAPHORE_SAD_EQ_SDD;
1627 *cs++ = 0;
1628 *cs++ = i915_ggtt_offset(vma);
1629 *cs++ = 0;
1630
1631 intel_ring_advance(lo, cs);
1632
1633 i915_request_get(lo);
1634 i915_request_add(lo);
1635
1636 if (wait_for(READ_ONCE(*map), 10)) {
1637 i915_request_put(lo);
1638 err = -ETIMEDOUT;
1639 goto err_vma;
1640 }
1641
1642 /* Low priority request should be busywaiting now */
1643 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1644 i915_request_put(lo);
1645 pr_err("%s: Busywaiting request did not!\n",
1646 engine->name);
1647 err = -EIO;
1648 goto err_vma;
1649 }
1650
1651 hi = igt_request_alloc(ctx_hi, engine);
1652 if (IS_ERR(hi)) {
1653 err = PTR_ERR(hi);
1654 i915_request_put(lo);
1655 goto err_vma;
1656 }
1657
1658 cs = intel_ring_begin(hi, 4);
1659 if (IS_ERR(cs)) {
1660 err = PTR_ERR(cs);
1661 i915_request_add(hi);
1662 i915_request_put(lo);
1663 goto err_vma;
1664 }
1665
1666 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1667 *cs++ = i915_ggtt_offset(vma);
1668 *cs++ = 0;
1669 *cs++ = 0;
1670
1671 intel_ring_advance(hi, cs);
1672 i915_request_add(hi);
1673
1674 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1675 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1676
1677 pr_err("%s: Failed to preempt semaphore busywait!\n",
1678 engine->name);
1679
1680 intel_engine_dump(engine, &p, "%s\n", engine->name);
1681 GEM_TRACE_DUMP();
1682
1683 i915_request_put(lo);
1684 intel_gt_set_wedged(gt);
1685 err = -EIO;
1686 goto err_vma;
1687 }
1688 GEM_BUG_ON(READ_ONCE(*map));
1689 i915_request_put(lo);
1690
1691 if (igt_live_test_end(&t)) {
1692 err = -EIO;
1693 goto err_vma;
1694 }
1695 }
1696
1697 err = 0;
1698err_vma:
1699 i915_vma_unpin(vma);
1700err_map:
1701 i915_gem_object_unpin_map(obj);
1702err_obj:
1703 i915_gem_object_put(obj);
1704err_ctx_lo:
1705 kernel_context_close(ctx_lo);
1706err_ctx_hi:
1707 kernel_context_close(ctx_hi);
1708 return err;
1709}
1710
1711static struct i915_request *
1712spinner_create_request(struct igt_spinner *spin,
1713 struct i915_gem_context *ctx,
1714 struct intel_engine_cs *engine,
1715 u32 arb)
1716{
1717 struct intel_context *ce;
1718 struct i915_request *rq;
1719
1720 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1721 if (IS_ERR(ce))
1722 return ERR_CAST(ce);
1723
1724 rq = igt_spinner_create_request(spin, ce, arb);
1725 intel_context_put(ce);
1726 return rq;
1727}
1728
1729static int live_preempt(void *arg)
1730{
1731 struct intel_gt *gt = arg;
1732 struct i915_gem_context *ctx_hi, *ctx_lo;
1733 struct igt_spinner spin_hi, spin_lo;
1734 struct intel_engine_cs *engine;
1735 enum intel_engine_id id;
1736 int err = -ENOMEM;
1737
1738 if (igt_spinner_init(&spin_hi, gt))
1739 return -ENOMEM;
1740
1741 if (igt_spinner_init(&spin_lo, gt))
1742 goto err_spin_hi;
1743
1744 ctx_hi = kernel_context(gt->i915);
1745 if (!ctx_hi)
1746 goto err_spin_lo;
1747 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748
1749 ctx_lo = kernel_context(gt->i915);
1750 if (!ctx_lo)
1751 goto err_ctx_hi;
1752 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753
1754 for_each_engine(engine, gt, id) {
1755 struct igt_live_test t;
1756 struct i915_request *rq;
1757
1758 if (!intel_engine_has_preemption(engine))
1759 continue;
1760
1761 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1762 err = -EIO;
1763 goto err_ctx_lo;
1764 }
1765
1766 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1767 MI_ARB_CHECK);
1768 if (IS_ERR(rq)) {
1769 err = PTR_ERR(rq);
1770 goto err_ctx_lo;
1771 }
1772
1773 i915_request_add(rq);
1774 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1775 GEM_TRACE("lo spinner failed to start\n");
1776 GEM_TRACE_DUMP();
1777 intel_gt_set_wedged(gt);
1778 err = -EIO;
1779 goto err_ctx_lo;
1780 }
1781
1782 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1783 MI_ARB_CHECK);
1784 if (IS_ERR(rq)) {
1785 igt_spinner_end(&spin_lo);
1786 err = PTR_ERR(rq);
1787 goto err_ctx_lo;
1788 }
1789
1790 i915_request_add(rq);
1791 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1792 GEM_TRACE("hi spinner failed to start\n");
1793 GEM_TRACE_DUMP();
1794 intel_gt_set_wedged(gt);
1795 err = -EIO;
1796 goto err_ctx_lo;
1797 }
1798
1799 igt_spinner_end(&spin_hi);
1800 igt_spinner_end(&spin_lo);
1801
1802 if (igt_live_test_end(&t)) {
1803 err = -EIO;
1804 goto err_ctx_lo;
1805 }
1806 }
1807
1808 err = 0;
1809err_ctx_lo:
1810 kernel_context_close(ctx_lo);
1811err_ctx_hi:
1812 kernel_context_close(ctx_hi);
1813err_spin_lo:
1814 igt_spinner_fini(&spin_lo);
1815err_spin_hi:
1816 igt_spinner_fini(&spin_hi);
1817 return err;
1818}
1819
1820static int live_late_preempt(void *arg)
1821{
1822 struct intel_gt *gt = arg;
1823 struct i915_gem_context *ctx_hi, *ctx_lo;
1824 struct igt_spinner spin_hi, spin_lo;
1825 struct intel_engine_cs *engine;
1826 struct i915_sched_attr attr = {};
1827 enum intel_engine_id id;
1828 int err = -ENOMEM;
1829
1830 if (igt_spinner_init(&spin_hi, gt))
1831 return -ENOMEM;
1832
1833 if (igt_spinner_init(&spin_lo, gt))
1834 goto err_spin_hi;
1835
1836 ctx_hi = kernel_context(gt->i915);
1837 if (!ctx_hi)
1838 goto err_spin_lo;
1839
1840 ctx_lo = kernel_context(gt->i915);
1841 if (!ctx_lo)
1842 goto err_ctx_hi;
1843
1844 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1845 ctx_lo->sched.priority = 1;
1846
1847 for_each_engine(engine, gt, id) {
1848 struct igt_live_test t;
1849 struct i915_request *rq;
1850
1851 if (!intel_engine_has_preemption(engine))
1852 continue;
1853
1854 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1855 err = -EIO;
1856 goto err_ctx_lo;
1857 }
1858
1859 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1860 MI_ARB_CHECK);
1861 if (IS_ERR(rq)) {
1862 err = PTR_ERR(rq);
1863 goto err_ctx_lo;
1864 }
1865
1866 i915_request_add(rq);
1867 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1868 pr_err("First context failed to start\n");
1869 goto err_wedged;
1870 }
1871
1872 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1873 MI_NOOP);
1874 if (IS_ERR(rq)) {
1875 igt_spinner_end(&spin_lo);
1876 err = PTR_ERR(rq);
1877 goto err_ctx_lo;
1878 }
1879
1880 i915_request_add(rq);
1881 if (igt_wait_for_spinner(&spin_hi, rq)) {
1882 pr_err("Second context overtook first?\n");
1883 goto err_wedged;
1884 }
1885
1886 attr.priority = I915_PRIORITY_MAX;
1887 engine->schedule(rq, &attr);
1888
1889 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1890 pr_err("High priority context failed to preempt the low priority context\n");
1891 GEM_TRACE_DUMP();
1892 goto err_wedged;
1893 }
1894
1895 igt_spinner_end(&spin_hi);
1896 igt_spinner_end(&spin_lo);
1897
1898 if (igt_live_test_end(&t)) {
1899 err = -EIO;
1900 goto err_ctx_lo;
1901 }
1902 }
1903
1904 err = 0;
1905err_ctx_lo:
1906 kernel_context_close(ctx_lo);
1907err_ctx_hi:
1908 kernel_context_close(ctx_hi);
1909err_spin_lo:
1910 igt_spinner_fini(&spin_lo);
1911err_spin_hi:
1912 igt_spinner_fini(&spin_hi);
1913 return err;
1914
1915err_wedged:
1916 igt_spinner_end(&spin_hi);
1917 igt_spinner_end(&spin_lo);
1918 intel_gt_set_wedged(gt);
1919 err = -EIO;
1920 goto err_ctx_lo;
1921}
1922
1923struct preempt_client {
1924 struct igt_spinner spin;
1925 struct i915_gem_context *ctx;
1926};
1927
1928static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1929{
1930 c->ctx = kernel_context(gt->i915);
1931 if (!c->ctx)
1932 return -ENOMEM;
1933
1934 if (igt_spinner_init(&c->spin, gt))
1935 goto err_ctx;
1936
1937 return 0;
1938
1939err_ctx:
1940 kernel_context_close(c->ctx);
1941 return -ENOMEM;
1942}
1943
1944static void preempt_client_fini(struct preempt_client *c)
1945{
1946 igt_spinner_fini(&c->spin);
1947 kernel_context_close(c->ctx);
1948}
1949
1950static int live_nopreempt(void *arg)
1951{
1952 struct intel_gt *gt = arg;
1953 struct intel_engine_cs *engine;
1954 struct preempt_client a, b;
1955 enum intel_engine_id id;
1956 int err = -ENOMEM;
1957
1958 /*
1959 * Verify that we can disable preemption for an individual request
1960 * that may be being observed and not want to be interrupted.
1961 */
1962
1963 if (preempt_client_init(gt, &a))
1964 return -ENOMEM;
1965 if (preempt_client_init(gt, &b))
1966 goto err_client_a;
1967 b.ctx->sched.priority = I915_PRIORITY_MAX;
1968
1969 for_each_engine(engine, gt, id) {
1970 struct i915_request *rq_a, *rq_b;
1971
1972 if (!intel_engine_has_preemption(engine))
1973 continue;
1974
1975 engine->execlists.preempt_hang.count = 0;
1976
1977 rq_a = spinner_create_request(&a.spin,
1978 a.ctx, engine,
1979 MI_ARB_CHECK);
1980 if (IS_ERR(rq_a)) {
1981 err = PTR_ERR(rq_a);
1982 goto err_client_b;
1983 }
1984
1985 /* Low priority client, but unpreemptable! */
1986 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1987
1988 i915_request_add(rq_a);
1989 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1990 pr_err("First client failed to start\n");
1991 goto err_wedged;
1992 }
1993
1994 rq_b = spinner_create_request(&b.spin,
1995 b.ctx, engine,
1996 MI_ARB_CHECK);
1997 if (IS_ERR(rq_b)) {
1998 err = PTR_ERR(rq_b);
1999 goto err_client_b;
2000 }
2001
2002 i915_request_add(rq_b);
2003
2004 /* B is much more important than A! (But A is unpreemptable.) */
2005 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2006
2007 /* Wait long enough for preemption and timeslicing */
2008 if (igt_wait_for_spinner(&b.spin, rq_b)) {
2009 pr_err("Second client started too early!\n");
2010 goto err_wedged;
2011 }
2012
2013 igt_spinner_end(&a.spin);
2014
2015 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2016 pr_err("Second client failed to start\n");
2017 goto err_wedged;
2018 }
2019
2020 igt_spinner_end(&b.spin);
2021
2022 if (engine->execlists.preempt_hang.count) {
2023 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2024 engine->execlists.preempt_hang.count);
2025 err = -EINVAL;
2026 goto err_wedged;
2027 }
2028
2029 if (igt_flush_test(gt->i915))
2030 goto err_wedged;
2031 }
2032
2033 err = 0;
2034err_client_b:
2035 preempt_client_fini(&b);
2036err_client_a:
2037 preempt_client_fini(&a);
2038 return err;
2039
2040err_wedged:
2041 igt_spinner_end(&b.spin);
2042 igt_spinner_end(&a.spin);
2043 intel_gt_set_wedged(gt);
2044 err = -EIO;
2045 goto err_client_b;
2046}
2047
2048struct live_preempt_cancel {
2049 struct intel_engine_cs *engine;
2050 struct preempt_client a, b;
2051};
2052
2053static int __cancel_active0(struct live_preempt_cancel *arg)
2054{
2055 struct i915_request *rq;
2056 struct igt_live_test t;
2057 int err;
2058
2059 /* Preempt cancel of ELSP0 */
2060 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061 if (igt_live_test_begin(&t, arg->engine->i915,
2062 __func__, arg->engine->name))
2063 return -EIO;
2064
2065 rq = spinner_create_request(&arg->a.spin,
2066 arg->a.ctx, arg->engine,
2067 MI_ARB_CHECK);
2068 if (IS_ERR(rq))
2069 return PTR_ERR(rq);
2070
2071 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2072 i915_request_get(rq);
2073 i915_request_add(rq);
2074 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2075 err = -EIO;
2076 goto out;
2077 }
2078
2079 intel_context_set_banned(rq->context);
2080 err = intel_engine_pulse(arg->engine);
2081 if (err)
2082 goto out;
2083
2084 err = wait_for_reset(arg->engine, rq, HZ / 2);
2085 if (err) {
2086 pr_err("Cancelled inflight0 request did not reset\n");
2087 goto out;
2088 }
2089
2090out:
2091 i915_request_put(rq);
2092 if (igt_live_test_end(&t))
2093 err = -EIO;
2094 return err;
2095}
2096
2097static int __cancel_active1(struct live_preempt_cancel *arg)
2098{
2099 struct i915_request *rq[2] = {};
2100 struct igt_live_test t;
2101 int err;
2102
2103 /* Preempt cancel of ELSP1 */
2104 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2105 if (igt_live_test_begin(&t, arg->engine->i915,
2106 __func__, arg->engine->name))
2107 return -EIO;
2108
2109 rq[0] = spinner_create_request(&arg->a.spin,
2110 arg->a.ctx, arg->engine,
2111 MI_NOOP); /* no preemption */
2112 if (IS_ERR(rq[0]))
2113 return PTR_ERR(rq[0]);
2114
2115 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2116 i915_request_get(rq[0]);
2117 i915_request_add(rq[0]);
2118 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2119 err = -EIO;
2120 goto out;
2121 }
2122
2123 rq[1] = spinner_create_request(&arg->b.spin,
2124 arg->b.ctx, arg->engine,
2125 MI_ARB_CHECK);
2126 if (IS_ERR(rq[1])) {
2127 err = PTR_ERR(rq[1]);
2128 goto out;
2129 }
2130
2131 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2132 i915_request_get(rq[1]);
2133 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2134 i915_request_add(rq[1]);
2135 if (err)
2136 goto out;
2137
2138 intel_context_set_banned(rq[1]->context);
2139 err = intel_engine_pulse(arg->engine);
2140 if (err)
2141 goto out;
2142
2143 igt_spinner_end(&arg->a.spin);
2144 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2145 if (err)
2146 goto out;
2147
2148 if (rq[0]->fence.error != 0) {
2149 pr_err("Normal inflight0 request did not complete\n");
2150 err = -EINVAL;
2151 goto out;
2152 }
2153
2154 if (rq[1]->fence.error != -EIO) {
2155 pr_err("Cancelled inflight1 request did not report -EIO\n");
2156 err = -EINVAL;
2157 goto out;
2158 }
2159
2160out:
2161 i915_request_put(rq[1]);
2162 i915_request_put(rq[0]);
2163 if (igt_live_test_end(&t))
2164 err = -EIO;
2165 return err;
2166}
2167
2168static int __cancel_queued(struct live_preempt_cancel *arg)
2169{
2170 struct i915_request *rq[3] = {};
2171 struct igt_live_test t;
2172 int err;
2173
2174 /* Full ELSP and one in the wings */
2175 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2176 if (igt_live_test_begin(&t, arg->engine->i915,
2177 __func__, arg->engine->name))
2178 return -EIO;
2179
2180 rq[0] = spinner_create_request(&arg->a.spin,
2181 arg->a.ctx, arg->engine,
2182 MI_ARB_CHECK);
2183 if (IS_ERR(rq[0]))
2184 return PTR_ERR(rq[0]);
2185
2186 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2187 i915_request_get(rq[0]);
2188 i915_request_add(rq[0]);
2189 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2190 err = -EIO;
2191 goto out;
2192 }
2193
2194 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2195 if (IS_ERR(rq[1])) {
2196 err = PTR_ERR(rq[1]);
2197 goto out;
2198 }
2199
2200 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2201 i915_request_get(rq[1]);
2202 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2203 i915_request_add(rq[1]);
2204 if (err)
2205 goto out;
2206
2207 rq[2] = spinner_create_request(&arg->b.spin,
2208 arg->a.ctx, arg->engine,
2209 MI_ARB_CHECK);
2210 if (IS_ERR(rq[2])) {
2211 err = PTR_ERR(rq[2]);
2212 goto out;
2213 }
2214
2215 i915_request_get(rq[2]);
2216 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2217 i915_request_add(rq[2]);
2218 if (err)
2219 goto out;
2220
2221 intel_context_set_banned(rq[2]->context);
2222 err = intel_engine_pulse(arg->engine);
2223 if (err)
2224 goto out;
2225
2226 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2227 if (err)
2228 goto out;
2229
2230 if (rq[0]->fence.error != -EIO) {
2231 pr_err("Cancelled inflight0 request did not report -EIO\n");
2232 err = -EINVAL;
2233 goto out;
2234 }
2235
2236 if (rq[1]->fence.error != 0) {
2237 pr_err("Normal inflight1 request did not complete\n");
2238 err = -EINVAL;
2239 goto out;
2240 }
2241
2242 if (rq[2]->fence.error != -EIO) {
2243 pr_err("Cancelled queued request did not report -EIO\n");
2244 err = -EINVAL;
2245 goto out;
2246 }
2247
2248out:
2249 i915_request_put(rq[2]);
2250 i915_request_put(rq[1]);
2251 i915_request_put(rq[0]);
2252 if (igt_live_test_end(&t))
2253 err = -EIO;
2254 return err;
2255}
2256
2257static int __cancel_hostile(struct live_preempt_cancel *arg)
2258{
2259 struct i915_request *rq;
2260 int err;
2261
2262 /* Preempt cancel non-preemptible spinner in ELSP0 */
2263 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2264 return 0;
2265
2266 if (!intel_has_reset_engine(arg->engine->gt))
2267 return 0;
2268
2269 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2270 rq = spinner_create_request(&arg->a.spin,
2271 arg->a.ctx, arg->engine,
2272 MI_NOOP); /* preemption disabled */
2273 if (IS_ERR(rq))
2274 return PTR_ERR(rq);
2275
2276 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2277 i915_request_get(rq);
2278 i915_request_add(rq);
2279 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2280 err = -EIO;
2281 goto out;
2282 }
2283
2284 intel_context_set_banned(rq->context);
2285 err = intel_engine_pulse(arg->engine); /* force reset */
2286 if (err)
2287 goto out;
2288
2289 err = wait_for_reset(arg->engine, rq, HZ / 2);
2290 if (err) {
2291 pr_err("Cancelled inflight0 request did not reset\n");
2292 goto out;
2293 }
2294
2295out:
2296 i915_request_put(rq);
2297 if (igt_flush_test(arg->engine->i915))
2298 err = -EIO;
2299 return err;
2300}
2301
2302static void force_reset_timeout(struct intel_engine_cs *engine)
2303{
2304 engine->reset_timeout.probability = 999;
2305 atomic_set(&engine->reset_timeout.times, -1);
2306}
2307
2308static void cancel_reset_timeout(struct intel_engine_cs *engine)
2309{
2310 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2311}
2312
2313static int __cancel_fail(struct live_preempt_cancel *arg)
2314{
2315 struct intel_engine_cs *engine = arg->engine;
2316 struct i915_request *rq;
2317 int err;
2318
2319 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2320 return 0;
2321
2322 if (!intel_has_reset_engine(engine->gt))
2323 return 0;
2324
2325 GEM_TRACE("%s(%s)\n", __func__, engine->name);
2326 rq = spinner_create_request(&arg->a.spin,
2327 arg->a.ctx, engine,
2328 MI_NOOP); /* preemption disabled */
2329 if (IS_ERR(rq))
2330 return PTR_ERR(rq);
2331
2332 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2333 i915_request_get(rq);
2334 i915_request_add(rq);
2335 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2336 err = -EIO;
2337 goto out;
2338 }
2339
2340 intel_context_set_banned(rq->context);
2341
2342 err = intel_engine_pulse(engine);
2343 if (err)
2344 goto out;
2345
2346 force_reset_timeout(engine);
2347
2348 /* force preempt reset [failure] */
2349 while (!engine->execlists.pending[0])
2350 intel_engine_flush_submission(engine);
2351 del_timer_sync(&engine->execlists.preempt);
2352 intel_engine_flush_submission(engine);
2353
2354 cancel_reset_timeout(engine);
2355
2356 /* after failure, require heartbeats to reset device */
2357 intel_engine_set_heartbeat(engine, 1);
2358 err = wait_for_reset(engine, rq, HZ / 2);
2359 intel_engine_set_heartbeat(engine,
2360 engine->defaults.heartbeat_interval_ms);
2361 if (err) {
2362 pr_err("Cancelled inflight0 request did not reset\n");
2363 goto out;
2364 }
2365
2366out:
2367 i915_request_put(rq);
2368 if (igt_flush_test(engine->i915))
2369 err = -EIO;
2370 return err;
2371}
2372
2373static int live_preempt_cancel(void *arg)
2374{
2375 struct intel_gt *gt = arg;
2376 struct live_preempt_cancel data;
2377 enum intel_engine_id id;
2378 int err = -ENOMEM;
2379
2380 /*
2381 * To cancel an inflight context, we need to first remove it from the
2382 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2383 */
2384
2385 if (preempt_client_init(gt, &data.a))
2386 return -ENOMEM;
2387 if (preempt_client_init(gt, &data.b))
2388 goto err_client_a;
2389
2390 for_each_engine(data.engine, gt, id) {
2391 if (!intel_engine_has_preemption(data.engine))
2392 continue;
2393
2394 err = __cancel_active0(&data);
2395 if (err)
2396 goto err_wedged;
2397
2398 err = __cancel_active1(&data);
2399 if (err)
2400 goto err_wedged;
2401
2402 err = __cancel_queued(&data);
2403 if (err)
2404 goto err_wedged;
2405
2406 err = __cancel_hostile(&data);
2407 if (err)
2408 goto err_wedged;
2409
2410 err = __cancel_fail(&data);
2411 if (err)
2412 goto err_wedged;
2413 }
2414
2415 err = 0;
2416err_client_b:
2417 preempt_client_fini(&data.b);
2418err_client_a:
2419 preempt_client_fini(&data.a);
2420 return err;
2421
2422err_wedged:
2423 GEM_TRACE_DUMP();
2424 igt_spinner_end(&data.b.spin);
2425 igt_spinner_end(&data.a.spin);
2426 intel_gt_set_wedged(gt);
2427 goto err_client_b;
2428}
2429
2430static int live_suppress_self_preempt(void *arg)
2431{
2432 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2433 struct intel_gt *gt = arg;
2434 struct intel_engine_cs *engine;
2435 struct preempt_client a, b;
2436 enum intel_engine_id id;
2437 int err = -ENOMEM;
2438
2439 /*
2440 * Verify that if a preemption request does not cause a change in
2441 * the current execution order, the preempt-to-idle injection is
2442 * skipped and that we do not accidentally apply it after the CS
2443 * completion event.
2444 */
2445
2446 if (intel_uc_uses_guc_submission(>->uc))
2447 return 0; /* presume black blox */
2448
2449 if (intel_vgpu_active(gt->i915))
2450 return 0; /* GVT forces single port & request submission */
2451
2452 if (preempt_client_init(gt, &a))
2453 return -ENOMEM;
2454 if (preempt_client_init(gt, &b))
2455 goto err_client_a;
2456
2457 for_each_engine(engine, gt, id) {
2458 struct i915_request *rq_a, *rq_b;
2459 int depth;
2460
2461 if (!intel_engine_has_preemption(engine))
2462 continue;
2463
2464 if (igt_flush_test(gt->i915))
2465 goto err_wedged;
2466
2467 st_engine_heartbeat_disable(engine);
2468 engine->execlists.preempt_hang.count = 0;
2469
2470 rq_a = spinner_create_request(&a.spin,
2471 a.ctx, engine,
2472 MI_NOOP);
2473 if (IS_ERR(rq_a)) {
2474 err = PTR_ERR(rq_a);
2475 st_engine_heartbeat_enable(engine);
2476 goto err_client_b;
2477 }
2478
2479 i915_request_add(rq_a);
2480 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2481 pr_err("First client failed to start\n");
2482 st_engine_heartbeat_enable(engine);
2483 goto err_wedged;
2484 }
2485
2486 /* Keep postponing the timer to avoid premature slicing */
2487 mod_timer(&engine->execlists.timer, jiffies + HZ);
2488 for (depth = 0; depth < 8; depth++) {
2489 rq_b = spinner_create_request(&b.spin,
2490 b.ctx, engine,
2491 MI_NOOP);
2492 if (IS_ERR(rq_b)) {
2493 err = PTR_ERR(rq_b);
2494 st_engine_heartbeat_enable(engine);
2495 goto err_client_b;
2496 }
2497 i915_request_add(rq_b);
2498
2499 GEM_BUG_ON(i915_request_completed(rq_a));
2500 engine->schedule(rq_a, &attr);
2501 igt_spinner_end(&a.spin);
2502
2503 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2504 pr_err("Second client failed to start\n");
2505 st_engine_heartbeat_enable(engine);
2506 goto err_wedged;
2507 }
2508
2509 swap(a, b);
2510 rq_a = rq_b;
2511 }
2512 igt_spinner_end(&a.spin);
2513
2514 if (engine->execlists.preempt_hang.count) {
2515 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2516 engine->name,
2517 engine->execlists.preempt_hang.count,
2518 depth);
2519 st_engine_heartbeat_enable(engine);
2520 err = -EINVAL;
2521 goto err_client_b;
2522 }
2523
2524 st_engine_heartbeat_enable(engine);
2525 if (igt_flush_test(gt->i915))
2526 goto err_wedged;
2527 }
2528
2529 err = 0;
2530err_client_b:
2531 preempt_client_fini(&b);
2532err_client_a:
2533 preempt_client_fini(&a);
2534 return err;
2535
2536err_wedged:
2537 igt_spinner_end(&b.spin);
2538 igt_spinner_end(&a.spin);
2539 intel_gt_set_wedged(gt);
2540 err = -EIO;
2541 goto err_client_b;
2542}
2543
2544static int live_chain_preempt(void *arg)
2545{
2546 struct intel_gt *gt = arg;
2547 struct intel_engine_cs *engine;
2548 struct preempt_client hi, lo;
2549 enum intel_engine_id id;
2550 int err = -ENOMEM;
2551
2552 /*
2553 * Build a chain AB...BA between two contexts (A, B) and request
2554 * preemption of the last request. It should then complete before
2555 * the previously submitted spinner in B.
2556 */
2557
2558 if (preempt_client_init(gt, &hi))
2559 return -ENOMEM;
2560
2561 if (preempt_client_init(gt, &lo))
2562 goto err_client_hi;
2563
2564 for_each_engine(engine, gt, id) {
2565 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2566 struct igt_live_test t;
2567 struct i915_request *rq;
2568 int ring_size, count, i;
2569
2570 if (!intel_engine_has_preemption(engine))
2571 continue;
2572
2573 rq = spinner_create_request(&lo.spin,
2574 lo.ctx, engine,
2575 MI_ARB_CHECK);
2576 if (IS_ERR(rq))
2577 goto err_wedged;
2578
2579 i915_request_get(rq);
2580 i915_request_add(rq);
2581
2582 ring_size = rq->wa_tail - rq->head;
2583 if (ring_size < 0)
2584 ring_size += rq->ring->size;
2585 ring_size = rq->ring->size / ring_size;
2586 pr_debug("%s(%s): Using maximum of %d requests\n",
2587 __func__, engine->name, ring_size);
2588
2589 igt_spinner_end(&lo.spin);
2590 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2591 pr_err("Timed out waiting to flush %s\n", engine->name);
2592 i915_request_put(rq);
2593 goto err_wedged;
2594 }
2595 i915_request_put(rq);
2596
2597 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2598 err = -EIO;
2599 goto err_wedged;
2600 }
2601
2602 for_each_prime_number_from(count, 1, ring_size) {
2603 rq = spinner_create_request(&hi.spin,
2604 hi.ctx, engine,
2605 MI_ARB_CHECK);
2606 if (IS_ERR(rq))
2607 goto err_wedged;
2608 i915_request_add(rq);
2609 if (!igt_wait_for_spinner(&hi.spin, rq))
2610 goto err_wedged;
2611
2612 rq = spinner_create_request(&lo.spin,
2613 lo.ctx, engine,
2614 MI_ARB_CHECK);
2615 if (IS_ERR(rq))
2616 goto err_wedged;
2617 i915_request_add(rq);
2618
2619 for (i = 0; i < count; i++) {
2620 rq = igt_request_alloc(lo.ctx, engine);
2621 if (IS_ERR(rq))
2622 goto err_wedged;
2623 i915_request_add(rq);
2624 }
2625
2626 rq = igt_request_alloc(hi.ctx, engine);
2627 if (IS_ERR(rq))
2628 goto err_wedged;
2629
2630 i915_request_get(rq);
2631 i915_request_add(rq);
2632 engine->schedule(rq, &attr);
2633
2634 igt_spinner_end(&hi.spin);
2635 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2636 struct drm_printer p =
2637 drm_info_printer(gt->i915->drm.dev);
2638
2639 pr_err("Failed to preempt over chain of %d\n",
2640 count);
2641 intel_engine_dump(engine, &p,
2642 "%s\n", engine->name);
2643 i915_request_put(rq);
2644 goto err_wedged;
2645 }
2646 igt_spinner_end(&lo.spin);
2647 i915_request_put(rq);
2648
2649 rq = igt_request_alloc(lo.ctx, engine);
2650 if (IS_ERR(rq))
2651 goto err_wedged;
2652
2653 i915_request_get(rq);
2654 i915_request_add(rq);
2655
2656 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2657 struct drm_printer p =
2658 drm_info_printer(gt->i915->drm.dev);
2659
2660 pr_err("Failed to flush low priority chain of %d requests\n",
2661 count);
2662 intel_engine_dump(engine, &p,
2663 "%s\n", engine->name);
2664
2665 i915_request_put(rq);
2666 goto err_wedged;
2667 }
2668 i915_request_put(rq);
2669 }
2670
2671 if (igt_live_test_end(&t)) {
2672 err = -EIO;
2673 goto err_wedged;
2674 }
2675 }
2676
2677 err = 0;
2678err_client_lo:
2679 preempt_client_fini(&lo);
2680err_client_hi:
2681 preempt_client_fini(&hi);
2682 return err;
2683
2684err_wedged:
2685 igt_spinner_end(&hi.spin);
2686 igt_spinner_end(&lo.spin);
2687 intel_gt_set_wedged(gt);
2688 err = -EIO;
2689 goto err_client_lo;
2690}
2691
2692static int create_gang(struct intel_engine_cs *engine,
2693 struct i915_request **prev)
2694{
2695 struct drm_i915_gem_object *obj;
2696 struct intel_context *ce;
2697 struct i915_request *rq;
2698 struct i915_vma *vma;
2699 u32 *cs;
2700 int err;
2701
2702 ce = intel_context_create(engine);
2703 if (IS_ERR(ce))
2704 return PTR_ERR(ce);
2705
2706 obj = i915_gem_object_create_internal(engine->i915, 4096);
2707 if (IS_ERR(obj)) {
2708 err = PTR_ERR(obj);
2709 goto err_ce;
2710 }
2711
2712 vma = i915_vma_instance(obj, ce->vm, NULL);
2713 if (IS_ERR(vma)) {
2714 err = PTR_ERR(vma);
2715 goto err_obj;
2716 }
2717
2718 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2719 if (err)
2720 goto err_obj;
2721
2722 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2723 if (IS_ERR(cs)) {
2724 err = PTR_ERR(cs);
2725 goto err_obj;
2726 }
2727
2728 /* Semaphore target: spin until zero */
2729 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2730
2731 *cs++ = MI_SEMAPHORE_WAIT |
2732 MI_SEMAPHORE_POLL |
2733 MI_SEMAPHORE_SAD_EQ_SDD;
2734 *cs++ = 0;
2735 *cs++ = lower_32_bits(vma->node.start);
2736 *cs++ = upper_32_bits(vma->node.start);
2737
2738 if (*prev) {
2739 u64 offset = (*prev)->batch->node.start;
2740
2741 /* Terminate the spinner in the next lower priority batch. */
2742 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2743 *cs++ = lower_32_bits(offset);
2744 *cs++ = upper_32_bits(offset);
2745 *cs++ = 0;
2746 }
2747
2748 *cs++ = MI_BATCH_BUFFER_END;
2749 i915_gem_object_flush_map(obj);
2750 i915_gem_object_unpin_map(obj);
2751
2752 rq = intel_context_create_request(ce);
2753 if (IS_ERR(rq)) {
2754 err = PTR_ERR(rq);
2755 goto err_obj;
2756 }
2757
2758 rq->batch = i915_vma_get(vma);
2759 i915_request_get(rq);
2760
2761 i915_vma_lock(vma);
2762 err = i915_request_await_object(rq, vma->obj, false);
2763 if (!err)
2764 err = i915_vma_move_to_active(vma, rq, 0);
2765 if (!err)
2766 err = rq->engine->emit_bb_start(rq,
2767 vma->node.start,
2768 PAGE_SIZE, 0);
2769 i915_vma_unlock(vma);
2770 i915_request_add(rq);
2771 if (err)
2772 goto err_rq;
2773
2774 i915_gem_object_put(obj);
2775 intel_context_put(ce);
2776
2777 rq->mock.link.next = &(*prev)->mock.link;
2778 *prev = rq;
2779 return 0;
2780
2781err_rq:
2782 i915_vma_put(rq->batch);
2783 i915_request_put(rq);
2784err_obj:
2785 i915_gem_object_put(obj);
2786err_ce:
2787 intel_context_put(ce);
2788 return err;
2789}
2790
2791static int __live_preempt_ring(struct intel_engine_cs *engine,
2792 struct igt_spinner *spin,
2793 int queue_sz, int ring_sz)
2794{
2795 struct intel_context *ce[2] = {};
2796 struct i915_request *rq;
2797 struct igt_live_test t;
2798 int err = 0;
2799 int n;
2800
2801 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2802 return -EIO;
2803
2804 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2805 struct intel_context *tmp;
2806
2807 tmp = intel_context_create(engine);
2808 if (IS_ERR(tmp)) {
2809 err = PTR_ERR(tmp);
2810 goto err_ce;
2811 }
2812
2813 tmp->ring = __intel_context_ring_size(ring_sz);
2814
2815 err = intel_context_pin(tmp);
2816 if (err) {
2817 intel_context_put(tmp);
2818 goto err_ce;
2819 }
2820
2821 memset32(tmp->ring->vaddr,
2822 0xdeadbeef, /* trigger a hang if executed */
2823 tmp->ring->vma->size / sizeof(u32));
2824
2825 ce[n] = tmp;
2826 }
2827
2828 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829 if (IS_ERR(rq)) {
2830 err = PTR_ERR(rq);
2831 goto err_ce;
2832 }
2833
2834 i915_request_get(rq);
2835 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2836 i915_request_add(rq);
2837
2838 if (!igt_wait_for_spinner(spin, rq)) {
2839 intel_gt_set_wedged(engine->gt);
2840 i915_request_put(rq);
2841 err = -ETIME;
2842 goto err_ce;
2843 }
2844
2845 /* Fill the ring, until we will cause a wrap */
2846 n = 0;
2847 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2848 struct i915_request *tmp;
2849
2850 tmp = intel_context_create_request(ce[0]);
2851 if (IS_ERR(tmp)) {
2852 err = PTR_ERR(tmp);
2853 i915_request_put(rq);
2854 goto err_ce;
2855 }
2856
2857 i915_request_add(tmp);
2858 intel_engine_flush_submission(engine);
2859 n++;
2860 }
2861 intel_engine_flush_submission(engine);
2862 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2863 engine->name, queue_sz, n,
2864 ce[0]->ring->size,
2865 ce[0]->ring->tail,
2866 ce[0]->ring->emit,
2867 rq->tail);
2868 i915_request_put(rq);
2869
2870 /* Create a second request to preempt the first ring */
2871 rq = intel_context_create_request(ce[1]);
2872 if (IS_ERR(rq)) {
2873 err = PTR_ERR(rq);
2874 goto err_ce;
2875 }
2876
2877 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2878 i915_request_get(rq);
2879 i915_request_add(rq);
2880
2881 err = wait_for_submit(engine, rq, HZ / 2);
2882 i915_request_put(rq);
2883 if (err) {
2884 pr_err("%s: preemption request was not submitted\n",
2885 engine->name);
2886 err = -ETIME;
2887 }
2888
2889 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2890 engine->name,
2891 ce[0]->ring->tail, ce[0]->ring->emit,
2892 ce[1]->ring->tail, ce[1]->ring->emit);
2893
2894err_ce:
2895 intel_engine_flush_submission(engine);
2896 igt_spinner_end(spin);
2897 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2898 if (IS_ERR_OR_NULL(ce[n]))
2899 break;
2900
2901 intel_context_unpin(ce[n]);
2902 intel_context_put(ce[n]);
2903 }
2904 if (igt_live_test_end(&t))
2905 err = -EIO;
2906 return err;
2907}
2908
2909static int live_preempt_ring(void *arg)
2910{
2911 struct intel_gt *gt = arg;
2912 struct intel_engine_cs *engine;
2913 struct igt_spinner spin;
2914 enum intel_engine_id id;
2915 int err = 0;
2916
2917 /*
2918 * Check that we rollback large chunks of a ring in order to do a
2919 * preemption event. Similar to live_unlite_ring, but looking at
2920 * ring size rather than the impact of intel_ring_direction().
2921 */
2922
2923 if (igt_spinner_init(&spin, gt))
2924 return -ENOMEM;
2925
2926 for_each_engine(engine, gt, id) {
2927 int n;
2928
2929 if (!intel_engine_has_preemption(engine))
2930 continue;
2931
2932 if (!intel_engine_can_store_dword(engine))
2933 continue;
2934
2935 st_engine_heartbeat_disable(engine);
2936
2937 for (n = 0; n <= 3; n++) {
2938 err = __live_preempt_ring(engine, &spin,
2939 n * SZ_4K / 4, SZ_4K);
2940 if (err)
2941 break;
2942 }
2943
2944 st_engine_heartbeat_enable(engine);
2945 if (err)
2946 break;
2947 }
2948
2949 igt_spinner_fini(&spin);
2950 return err;
2951}
2952
2953static int live_preempt_gang(void *arg)
2954{
2955 struct intel_gt *gt = arg;
2956 struct intel_engine_cs *engine;
2957 enum intel_engine_id id;
2958
2959 /*
2960 * Build as long a chain of preempters as we can, with each
2961 * request higher priority than the last. Once we are ready, we release
2962 * the last batch which then precolates down the chain, each releasing
2963 * the next oldest in turn. The intent is to simply push as hard as we
2964 * can with the number of preemptions, trying to exceed narrow HW
2965 * limits. At a minimum, we insist that we can sort all the user
2966 * high priority levels into execution order.
2967 */
2968
2969 for_each_engine(engine, gt, id) {
2970 struct i915_request *rq = NULL;
2971 struct igt_live_test t;
2972 IGT_TIMEOUT(end_time);
2973 int prio = 0;
2974 int err = 0;
2975 u32 *cs;
2976
2977 if (!intel_engine_has_preemption(engine))
2978 continue;
2979
2980 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2981 return -EIO;
2982
2983 do {
2984 struct i915_sched_attr attr = { .priority = prio++ };
2985
2986 err = create_gang(engine, &rq);
2987 if (err)
2988 break;
2989
2990 /* Submit each spinner at increasing priority */
2991 engine->schedule(rq, &attr);
2992 } while (prio <= I915_PRIORITY_MAX &&
2993 !__igt_timeout(end_time, NULL));
2994 pr_debug("%s: Preempt chain of %d requests\n",
2995 engine->name, prio);
2996
2997 /*
2998 * Such that the last spinner is the highest priority and
2999 * should execute first. When that spinner completes,
3000 * it will terminate the next lowest spinner until there
3001 * are no more spinners and the gang is complete.
3002 */
3003 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3004 if (!IS_ERR(cs)) {
3005 *cs = 0;
3006 i915_gem_object_unpin_map(rq->batch->obj);
3007 } else {
3008 err = PTR_ERR(cs);
3009 intel_gt_set_wedged(gt);
3010 }
3011
3012 while (rq) { /* wait for each rq from highest to lowest prio */
3013 struct i915_request *n = list_next_entry(rq, mock.link);
3014
3015 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3016 struct drm_printer p =
3017 drm_info_printer(engine->i915->drm.dev);
3018
3019 pr_err("Failed to flush chain of %d requests, at %d\n",
3020 prio, rq_prio(rq));
3021 intel_engine_dump(engine, &p,
3022 "%s\n", engine->name);
3023
3024 err = -ETIME;
3025 }
3026
3027 i915_vma_put(rq->batch);
3028 i915_request_put(rq);
3029 rq = n;
3030 }
3031
3032 if (igt_live_test_end(&t))
3033 err = -EIO;
3034 if (err)
3035 return err;
3036 }
3037
3038 return 0;
3039}
3040
3041static struct i915_vma *
3042create_gpr_user(struct intel_engine_cs *engine,
3043 struct i915_vma *result,
3044 unsigned int offset)
3045{
3046 struct drm_i915_gem_object *obj;
3047 struct i915_vma *vma;
3048 u32 *cs;
3049 int err;
3050 int i;
3051
3052 obj = i915_gem_object_create_internal(engine->i915, 4096);
3053 if (IS_ERR(obj))
3054 return ERR_CAST(obj);
3055
3056 vma = i915_vma_instance(obj, result->vm, NULL);
3057 if (IS_ERR(vma)) {
3058 i915_gem_object_put(obj);
3059 return vma;
3060 }
3061
3062 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3063 if (err) {
3064 i915_vma_put(vma);
3065 return ERR_PTR(err);
3066 }
3067
3068 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3069 if (IS_ERR(cs)) {
3070 i915_vma_put(vma);
3071 return ERR_CAST(cs);
3072 }
3073
3074 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3075 *cs++ = MI_LOAD_REGISTER_IMM(1);
3076 *cs++ = CS_GPR(engine, 0);
3077 *cs++ = 1;
3078
3079 for (i = 1; i < NUM_GPR; i++) {
3080 u64 addr;
3081
3082 /*
3083 * Perform: GPR[i]++
3084 *
3085 * As we read and write into the context saved GPR[i], if
3086 * we restart this batch buffer from an earlier point, we
3087 * will repeat the increment and store a value > 1.
3088 */
3089 *cs++ = MI_MATH(4);
3090 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3091 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3092 *cs++ = MI_MATH_ADD;
3093 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3094
3095 addr = result->node.start + offset + i * sizeof(*cs);
3096 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3097 *cs++ = CS_GPR(engine, 2 * i);
3098 *cs++ = lower_32_bits(addr);
3099 *cs++ = upper_32_bits(addr);
3100
3101 *cs++ = MI_SEMAPHORE_WAIT |
3102 MI_SEMAPHORE_POLL |
3103 MI_SEMAPHORE_SAD_GTE_SDD;
3104 *cs++ = i;
3105 *cs++ = lower_32_bits(result->node.start);
3106 *cs++ = upper_32_bits(result->node.start);
3107 }
3108
3109 *cs++ = MI_BATCH_BUFFER_END;
3110 i915_gem_object_flush_map(obj);
3111 i915_gem_object_unpin_map(obj);
3112
3113 return vma;
3114}
3115
3116static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3117{
3118 struct drm_i915_gem_object *obj;
3119 struct i915_vma *vma;
3120 int err;
3121
3122 obj = i915_gem_object_create_internal(gt->i915, sz);
3123 if (IS_ERR(obj))
3124 return ERR_CAST(obj);
3125
3126 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3127 if (IS_ERR(vma)) {
3128 i915_gem_object_put(obj);
3129 return vma;
3130 }
3131
3132 err = i915_ggtt_pin(vma, NULL, 0, 0);
3133 if (err) {
3134 i915_vma_put(vma);
3135 return ERR_PTR(err);
3136 }
3137
3138 return vma;
3139}
3140
3141static struct i915_request *
3142create_gpr_client(struct intel_engine_cs *engine,
3143 struct i915_vma *global,
3144 unsigned int offset)
3145{
3146 struct i915_vma *batch, *vma;
3147 struct intel_context *ce;
3148 struct i915_request *rq;
3149 int err;
3150
3151 ce = intel_context_create(engine);
3152 if (IS_ERR(ce))
3153 return ERR_CAST(ce);
3154
3155 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3156 if (IS_ERR(vma)) {
3157 err = PTR_ERR(vma);
3158 goto out_ce;
3159 }
3160
3161 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162 if (err)
3163 goto out_ce;
3164
3165 batch = create_gpr_user(engine, vma, offset);
3166 if (IS_ERR(batch)) {
3167 err = PTR_ERR(batch);
3168 goto out_vma;
3169 }
3170
3171 rq = intel_context_create_request(ce);
3172 if (IS_ERR(rq)) {
3173 err = PTR_ERR(rq);
3174 goto out_batch;
3175 }
3176
3177 i915_vma_lock(vma);
3178 err = i915_request_await_object(rq, vma->obj, false);
3179 if (!err)
3180 err = i915_vma_move_to_active(vma, rq, 0);
3181 i915_vma_unlock(vma);
3182
3183 i915_vma_lock(batch);
3184 if (!err)
3185 err = i915_request_await_object(rq, batch->obj, false);
3186 if (!err)
3187 err = i915_vma_move_to_active(batch, rq, 0);
3188 if (!err)
3189 err = rq->engine->emit_bb_start(rq,
3190 batch->node.start,
3191 PAGE_SIZE, 0);
3192 i915_vma_unlock(batch);
3193 i915_vma_unpin(batch);
3194
3195 if (!err)
3196 i915_request_get(rq);
3197 i915_request_add(rq);
3198
3199out_batch:
3200 i915_vma_put(batch);
3201out_vma:
3202 i915_vma_unpin(vma);
3203out_ce:
3204 intel_context_put(ce);
3205 return err ? ERR_PTR(err) : rq;
3206}
3207
3208static int preempt_user(struct intel_engine_cs *engine,
3209 struct i915_vma *global,
3210 int id)
3211{
3212 struct i915_sched_attr attr = {
3213 .priority = I915_PRIORITY_MAX
3214 };
3215 struct i915_request *rq;
3216 int err = 0;
3217 u32 *cs;
3218
3219 rq = intel_engine_create_kernel_request(engine);
3220 if (IS_ERR(rq))
3221 return PTR_ERR(rq);
3222
3223 cs = intel_ring_begin(rq, 4);
3224 if (IS_ERR(cs)) {
3225 i915_request_add(rq);
3226 return PTR_ERR(cs);
3227 }
3228
3229 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3230 *cs++ = i915_ggtt_offset(global);
3231 *cs++ = 0;
3232 *cs++ = id;
3233
3234 intel_ring_advance(rq, cs);
3235
3236 i915_request_get(rq);
3237 i915_request_add(rq);
3238
3239 engine->schedule(rq, &attr);
3240
3241 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3242 err = -ETIME;
3243 i915_request_put(rq);
3244
3245 return err;
3246}
3247
3248static int live_preempt_user(void *arg)
3249{
3250 struct intel_gt *gt = arg;
3251 struct intel_engine_cs *engine;
3252 struct i915_vma *global;
3253 enum intel_engine_id id;
3254 u32 *result;
3255 int err = 0;
3256
3257 /*
3258 * In our other tests, we look at preemption in carefully
3259 * controlled conditions in the ringbuffer. Since most of the
3260 * time is spent in user batches, most of our preemptions naturally
3261 * occur there. We want to verify that when we preempt inside a batch
3262 * we continue on from the current instruction and do not roll back
3263 * to the start, or another earlier arbitration point.
3264 *
3265 * To verify this, we create a batch which is a mixture of
3266 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3267 * a few preempting contexts thrown into the mix, we look for any
3268 * repeated instructions (which show up as incorrect values).
3269 */
3270
3271 global = create_global(gt, 4096);
3272 if (IS_ERR(global))
3273 return PTR_ERR(global);
3274
3275 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3276 if (IS_ERR(result)) {
3277 i915_vma_unpin_and_release(&global, 0);
3278 return PTR_ERR(result);
3279 }
3280
3281 for_each_engine(engine, gt, id) {
3282 struct i915_request *client[3] = {};
3283 struct igt_live_test t;
3284 int i;
3285
3286 if (!intel_engine_has_preemption(engine))
3287 continue;
3288
3289 if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3290 continue; /* we need per-context GPR */
3291
3292 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3293 err = -EIO;
3294 break;
3295 }
3296
3297 memset(result, 0, 4096);
3298
3299 for (i = 0; i < ARRAY_SIZE(client); i++) {
3300 struct i915_request *rq;
3301
3302 rq = create_gpr_client(engine, global,
3303 NUM_GPR * i * sizeof(u32));
3304 if (IS_ERR(rq)) {
3305 err = PTR_ERR(rq);
3306 goto end_test;
3307 }
3308
3309 client[i] = rq;
3310 }
3311
3312 /* Continuously preempt the set of 3 running contexts */
3313 for (i = 1; i <= NUM_GPR; i++) {
3314 err = preempt_user(engine, global, i);
3315 if (err)
3316 goto end_test;
3317 }
3318
3319 if (READ_ONCE(result[0]) != NUM_GPR) {
3320 pr_err("%s: Failed to release semaphore\n",
3321 engine->name);
3322 err = -EIO;
3323 goto end_test;
3324 }
3325
3326 for (i = 0; i < ARRAY_SIZE(client); i++) {
3327 int gpr;
3328
3329 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3330 err = -ETIME;
3331 goto end_test;
3332 }
3333
3334 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3335 if (result[NUM_GPR * i + gpr] != 1) {
3336 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3337 engine->name,
3338 i, gpr, result[NUM_GPR * i + gpr]);
3339 err = -EINVAL;
3340 goto end_test;
3341 }
3342 }
3343 }
3344
3345end_test:
3346 for (i = 0; i < ARRAY_SIZE(client); i++) {
3347 if (!client[i])
3348 break;
3349
3350 i915_request_put(client[i]);
3351 }
3352
3353 /* Flush the semaphores on error */
3354 smp_store_mb(result[0], -1);
3355 if (igt_live_test_end(&t))
3356 err = -EIO;
3357 if (err)
3358 break;
3359 }
3360
3361 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362 return err;
3363}
3364
3365static int live_preempt_timeout(void *arg)
3366{
3367 struct intel_gt *gt = arg;
3368 struct i915_gem_context *ctx_hi, *ctx_lo;
3369 struct igt_spinner spin_lo;
3370 struct intel_engine_cs *engine;
3371 enum intel_engine_id id;
3372 int err = -ENOMEM;
3373
3374 /*
3375 * Check that we force preemption to occur by cancelling the previous
3376 * context if it refuses to yield the GPU.
3377 */
3378 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3379 return 0;
3380
3381 if (!intel_has_reset_engine(gt))
3382 return 0;
3383
3384 if (igt_spinner_init(&spin_lo, gt))
3385 return -ENOMEM;
3386
3387 ctx_hi = kernel_context(gt->i915);
3388 if (!ctx_hi)
3389 goto err_spin_lo;
3390 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3391
3392 ctx_lo = kernel_context(gt->i915);
3393 if (!ctx_lo)
3394 goto err_ctx_hi;
3395 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3396
3397 for_each_engine(engine, gt, id) {
3398 unsigned long saved_timeout;
3399 struct i915_request *rq;
3400
3401 if (!intel_engine_has_preemption(engine))
3402 continue;
3403
3404 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3405 MI_NOOP); /* preemption disabled */
3406 if (IS_ERR(rq)) {
3407 err = PTR_ERR(rq);
3408 goto err_ctx_lo;
3409 }
3410
3411 i915_request_add(rq);
3412 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3413 intel_gt_set_wedged(gt);
3414 err = -EIO;
3415 goto err_ctx_lo;
3416 }
3417
3418 rq = igt_request_alloc(ctx_hi, engine);
3419 if (IS_ERR(rq)) {
3420 igt_spinner_end(&spin_lo);
3421 err = PTR_ERR(rq);
3422 goto err_ctx_lo;
3423 }
3424
3425 /* Flush the previous CS ack before changing timeouts */
3426 while (READ_ONCE(engine->execlists.pending[0]))
3427 cpu_relax();
3428
3429 saved_timeout = engine->props.preempt_timeout_ms;
3430 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431
3432 i915_request_get(rq);
3433 i915_request_add(rq);
3434
3435 intel_engine_flush_submission(engine);
3436 engine->props.preempt_timeout_ms = saved_timeout;
3437
3438 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3439 intel_gt_set_wedged(gt);
3440 i915_request_put(rq);
3441 err = -ETIME;
3442 goto err_ctx_lo;
3443 }
3444
3445 igt_spinner_end(&spin_lo);
3446 i915_request_put(rq);
3447 }
3448
3449 err = 0;
3450err_ctx_lo:
3451 kernel_context_close(ctx_lo);
3452err_ctx_hi:
3453 kernel_context_close(ctx_hi);
3454err_spin_lo:
3455 igt_spinner_fini(&spin_lo);
3456 return err;
3457}
3458
3459static int random_range(struct rnd_state *rnd, int min, int max)
3460{
3461 return i915_prandom_u32_max_state(max - min, rnd) + min;
3462}
3463
3464static int random_priority(struct rnd_state *rnd)
3465{
3466 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3467}
3468
3469struct preempt_smoke {
3470 struct intel_gt *gt;
3471 struct i915_gem_context **contexts;
3472 struct intel_engine_cs *engine;
3473 struct drm_i915_gem_object *batch;
3474 unsigned int ncontext;
3475 struct rnd_state prng;
3476 unsigned long count;
3477};
3478
3479static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480{
3481 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3482 &smoke->prng)];
3483}
3484
3485static int smoke_submit(struct preempt_smoke *smoke,
3486 struct i915_gem_context *ctx, int prio,
3487 struct drm_i915_gem_object *batch)
3488{
3489 struct i915_request *rq;
3490 struct i915_vma *vma = NULL;
3491 int err = 0;
3492
3493 if (batch) {
3494 struct i915_address_space *vm;
3495
3496 vm = i915_gem_context_get_vm_rcu(ctx);
3497 vma = i915_vma_instance(batch, vm, NULL);
3498 i915_vm_put(vm);
3499 if (IS_ERR(vma))
3500 return PTR_ERR(vma);
3501
3502 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3503 if (err)
3504 return err;
3505 }
3506
3507 ctx->sched.priority = prio;
3508
3509 rq = igt_request_alloc(ctx, smoke->engine);
3510 if (IS_ERR(rq)) {
3511 err = PTR_ERR(rq);
3512 goto unpin;
3513 }
3514
3515 if (vma) {
3516 i915_vma_lock(vma);
3517 err = i915_request_await_object(rq, vma->obj, false);
3518 if (!err)
3519 err = i915_vma_move_to_active(vma, rq, 0);
3520 if (!err)
3521 err = rq->engine->emit_bb_start(rq,
3522 vma->node.start,
3523 PAGE_SIZE, 0);
3524 i915_vma_unlock(vma);
3525 }
3526
3527 i915_request_add(rq);
3528
3529unpin:
3530 if (vma)
3531 i915_vma_unpin(vma);
3532
3533 return err;
3534}
3535
3536static int smoke_crescendo_thread(void *arg)
3537{
3538 struct preempt_smoke *smoke = arg;
3539 IGT_TIMEOUT(end_time);
3540 unsigned long count;
3541
3542 count = 0;
3543 do {
3544 struct i915_gem_context *ctx = smoke_context(smoke);
3545 int err;
3546
3547 err = smoke_submit(smoke,
3548 ctx, count % I915_PRIORITY_MAX,
3549 smoke->batch);
3550 if (err)
3551 return err;
3552
3553 count++;
3554 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555
3556 smoke->count = count;
3557 return 0;
3558}
3559
3560static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3561#define BATCH BIT(0)
3562{
3563 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3564 struct preempt_smoke arg[I915_NUM_ENGINES];
3565 struct intel_engine_cs *engine;
3566 enum intel_engine_id id;
3567 unsigned long count;
3568 int err = 0;
3569
3570 for_each_engine(engine, smoke->gt, id) {
3571 arg[id] = *smoke;
3572 arg[id].engine = engine;
3573 if (!(flags & BATCH))
3574 arg[id].batch = NULL;
3575 arg[id].count = 0;
3576
3577 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3578 "igt/smoke:%d", id);
3579 if (IS_ERR(tsk[id])) {
3580 err = PTR_ERR(tsk[id]);
3581 break;
3582 }
3583 get_task_struct(tsk[id]);
3584 }
3585
3586 yield(); /* start all threads before we kthread_stop() */
3587
3588 count = 0;
3589 for_each_engine(engine, smoke->gt, id) {
3590 int status;
3591
3592 if (IS_ERR_OR_NULL(tsk[id]))
3593 continue;
3594
3595 status = kthread_stop(tsk[id]);
3596 if (status && !err)
3597 err = status;
3598
3599 count += arg[id].count;
3600
3601 put_task_struct(tsk[id]);
3602 }
3603
3604 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606 return 0;
3607}
3608
3609static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610{
3611 enum intel_engine_id id;
3612 IGT_TIMEOUT(end_time);
3613 unsigned long count;
3614
3615 count = 0;
3616 do {
3617 for_each_engine(smoke->engine, smoke->gt, id) {
3618 struct i915_gem_context *ctx = smoke_context(smoke);
3619 int err;
3620
3621 err = smoke_submit(smoke,
3622 ctx, random_priority(&smoke->prng),
3623 flags & BATCH ? smoke->batch : NULL);
3624 if (err)
3625 return err;
3626
3627 count++;
3628 }
3629 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630
3631 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3632 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3633 return 0;
3634}
3635
3636static int live_preempt_smoke(void *arg)
3637{
3638 struct preempt_smoke smoke = {
3639 .gt = arg,
3640 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3641 .ncontext = 256,
3642 };
3643 const unsigned int phase[] = { 0, BATCH };
3644 struct igt_live_test t;
3645 int err = -ENOMEM;
3646 u32 *cs;
3647 int n;
3648
3649 smoke.contexts = kmalloc_array(smoke.ncontext,
3650 sizeof(*smoke.contexts),
3651 GFP_KERNEL);
3652 if (!smoke.contexts)
3653 return -ENOMEM;
3654
3655 smoke.batch =
3656 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657 if (IS_ERR(smoke.batch)) {
3658 err = PTR_ERR(smoke.batch);
3659 goto err_free;
3660 }
3661
3662 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663 if (IS_ERR(cs)) {
3664 err = PTR_ERR(cs);
3665 goto err_batch;
3666 }
3667 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668 cs[n] = MI_ARB_CHECK;
3669 cs[n] = MI_BATCH_BUFFER_END;
3670 i915_gem_object_flush_map(smoke.batch);
3671 i915_gem_object_unpin_map(smoke.batch);
3672
3673 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674 err = -EIO;
3675 goto err_batch;
3676 }
3677
3678 for (n = 0; n < smoke.ncontext; n++) {
3679 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3680 if (!smoke.contexts[n])
3681 goto err_ctx;
3682 }
3683
3684 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685 err = smoke_crescendo(&smoke, phase[n]);
3686 if (err)
3687 goto err_ctx;
3688
3689 err = smoke_random(&smoke, phase[n]);
3690 if (err)
3691 goto err_ctx;
3692 }
3693
3694err_ctx:
3695 if (igt_live_test_end(&t))
3696 err = -EIO;
3697
3698 for (n = 0; n < smoke.ncontext; n++) {
3699 if (!smoke.contexts[n])
3700 break;
3701 kernel_context_close(smoke.contexts[n]);
3702 }
3703
3704err_batch:
3705 i915_gem_object_put(smoke.batch);
3706err_free:
3707 kfree(smoke.contexts);
3708
3709 return err;
3710}
3711
3712static int nop_virtual_engine(struct intel_gt *gt,
3713 struct intel_engine_cs **siblings,
3714 unsigned int nsibling,
3715 unsigned int nctx,
3716 unsigned int flags)
3717#define CHAIN BIT(0)
3718{
3719 IGT_TIMEOUT(end_time);
3720 struct i915_request *request[16] = {};
3721 struct intel_context *ve[16];
3722 unsigned long n, prime, nc;
3723 struct igt_live_test t;
3724 ktime_t times[2] = {};
3725 int err;
3726
3727 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728
3729 for (n = 0; n < nctx; n++) {
3730 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3731 if (IS_ERR(ve[n])) {
3732 err = PTR_ERR(ve[n]);
3733 nctx = n;
3734 goto out;
3735 }
3736
3737 err = intel_context_pin(ve[n]);
3738 if (err) {
3739 intel_context_put(ve[n]);
3740 nctx = n;
3741 goto out;
3742 }
3743 }
3744
3745 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746 if (err)
3747 goto out;
3748
3749 for_each_prime_number_from(prime, 1, 8192) {
3750 times[1] = ktime_get_raw();
3751
3752 if (flags & CHAIN) {
3753 for (nc = 0; nc < nctx; nc++) {
3754 for (n = 0; n < prime; n++) {
3755 struct i915_request *rq;
3756
3757 rq = i915_request_create(ve[nc]);
3758 if (IS_ERR(rq)) {
3759 err = PTR_ERR(rq);
3760 goto out;
3761 }
3762
3763 if (request[nc])
3764 i915_request_put(request[nc]);
3765 request[nc] = i915_request_get(rq);
3766 i915_request_add(rq);
3767 }
3768 }
3769 } else {
3770 for (n = 0; n < prime; n++) {
3771 for (nc = 0; nc < nctx; nc++) {
3772 struct i915_request *rq;
3773
3774 rq = i915_request_create(ve[nc]);
3775 if (IS_ERR(rq)) {
3776 err = PTR_ERR(rq);
3777 goto out;
3778 }
3779
3780 if (request[nc])
3781 i915_request_put(request[nc]);
3782 request[nc] = i915_request_get(rq);
3783 i915_request_add(rq);
3784 }
3785 }
3786 }
3787
3788 for (nc = 0; nc < nctx; nc++) {
3789 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791 __func__, ve[0]->engine->name,
3792 request[nc]->fence.context,
3793 request[nc]->fence.seqno);
3794
3795 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796 __func__, ve[0]->engine->name,
3797 request[nc]->fence.context,
3798 request[nc]->fence.seqno);
3799 GEM_TRACE_DUMP();
3800 intel_gt_set_wedged(gt);
3801 break;
3802 }
3803 }
3804
3805 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806 if (prime == 1)
3807 times[0] = times[1];
3808
3809 for (nc = 0; nc < nctx; nc++) {
3810 i915_request_put(request[nc]);
3811 request[nc] = NULL;
3812 }
3813
3814 if (__igt_timeout(end_time, NULL))
3815 break;
3816 }
3817
3818 err = igt_live_test_end(&t);
3819 if (err)
3820 goto out;
3821
3822 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824 prime, div64_u64(ktime_to_ns(times[1]), prime));
3825
3826out:
3827 if (igt_flush_test(gt->i915))
3828 err = -EIO;
3829
3830 for (nc = 0; nc < nctx; nc++) {
3831 i915_request_put(request[nc]);
3832 intel_context_unpin(ve[nc]);
3833 intel_context_put(ve[nc]);
3834 }
3835 return err;
3836}
3837
3838static unsigned int
3839__select_siblings(struct intel_gt *gt,
3840 unsigned int class,
3841 struct intel_engine_cs **siblings,
3842 bool (*filter)(const struct intel_engine_cs *))
3843{
3844 unsigned int n = 0;
3845 unsigned int inst;
3846
3847 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848 if (!gt->engine_class[class][inst])
3849 continue;
3850
3851 if (filter && !filter(gt->engine_class[class][inst]))
3852 continue;
3853
3854 siblings[n++] = gt->engine_class[class][inst];
3855 }
3856
3857 return n;
3858}
3859
3860static unsigned int
3861select_siblings(struct intel_gt *gt,
3862 unsigned int class,
3863 struct intel_engine_cs **siblings)
3864{
3865 return __select_siblings(gt, class, siblings, NULL);
3866}
3867
3868static int live_virtual_engine(void *arg)
3869{
3870 struct intel_gt *gt = arg;
3871 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872 struct intel_engine_cs *engine;
3873 enum intel_engine_id id;
3874 unsigned int class;
3875 int err;
3876
3877 if (intel_uc_uses_guc_submission(>->uc))
3878 return 0;
3879
3880 for_each_engine(engine, gt, id) {
3881 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882 if (err) {
3883 pr_err("Failed to wrap engine %s: err=%d\n",
3884 engine->name, err);
3885 return err;
3886 }
3887 }
3888
3889 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890 int nsibling, n;
3891
3892 nsibling = select_siblings(gt, class, siblings);
3893 if (nsibling < 2)
3894 continue;
3895
3896 for (n = 1; n <= nsibling + 1; n++) {
3897 err = nop_virtual_engine(gt, siblings, nsibling,
3898 n, 0);
3899 if (err)
3900 return err;
3901 }
3902
3903 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904 if (err)
3905 return err;
3906 }
3907
3908 return 0;
3909}
3910
3911static int mask_virtual_engine(struct intel_gt *gt,
3912 struct intel_engine_cs **siblings,
3913 unsigned int nsibling)
3914{
3915 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916 struct intel_context *ve;
3917 struct igt_live_test t;
3918 unsigned int n;
3919 int err;
3920
3921 /*
3922 * Check that by setting the execution mask on a request, we can
3923 * restrict it to our desired engine within the virtual engine.
3924 */
3925
3926 ve = intel_execlists_create_virtual(siblings, nsibling);
3927 if (IS_ERR(ve)) {
3928 err = PTR_ERR(ve);
3929 goto out_close;
3930 }
3931
3932 err = intel_context_pin(ve);
3933 if (err)
3934 goto out_put;
3935
3936 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937 if (err)
3938 goto out_unpin;
3939
3940 for (n = 0; n < nsibling; n++) {
3941 request[n] = i915_request_create(ve);
3942 if (IS_ERR(request[n])) {
3943 err = PTR_ERR(request[n]);
3944 nsibling = n;
3945 goto out;
3946 }
3947
3948 /* Reverse order as it's more likely to be unnatural */
3949 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950
3951 i915_request_get(request[n]);
3952 i915_request_add(request[n]);
3953 }
3954
3955 for (n = 0; n < nsibling; n++) {
3956 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958 __func__, ve->engine->name,
3959 request[n]->fence.context,
3960 request[n]->fence.seqno);
3961
3962 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963 __func__, ve->engine->name,
3964 request[n]->fence.context,
3965 request[n]->fence.seqno);
3966 GEM_TRACE_DUMP();
3967 intel_gt_set_wedged(gt);
3968 err = -EIO;
3969 goto out;
3970 }
3971
3972 if (request[n]->engine != siblings[nsibling - n - 1]) {
3973 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974 request[n]->engine->name,
3975 siblings[nsibling - n - 1]->name);
3976 err = -EINVAL;
3977 goto out;
3978 }
3979 }
3980
3981 err = igt_live_test_end(&t);
3982out:
3983 if (igt_flush_test(gt->i915))
3984 err = -EIO;
3985
3986 for (n = 0; n < nsibling; n++)
3987 i915_request_put(request[n]);
3988
3989out_unpin:
3990 intel_context_unpin(ve);
3991out_put:
3992 intel_context_put(ve);
3993out_close:
3994 return err;
3995}
3996
3997static int live_virtual_mask(void *arg)
3998{
3999 struct intel_gt *gt = arg;
4000 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4001 unsigned int class;
4002 int err;
4003
4004 if (intel_uc_uses_guc_submission(>->uc))
4005 return 0;
4006
4007 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008 unsigned int nsibling;
4009
4010 nsibling = select_siblings(gt, class, siblings);
4011 if (nsibling < 2)
4012 continue;
4013
4014 err = mask_virtual_engine(gt, siblings, nsibling);
4015 if (err)
4016 return err;
4017 }
4018
4019 return 0;
4020}
4021
4022static int slicein_virtual_engine(struct intel_gt *gt,
4023 struct intel_engine_cs **siblings,
4024 unsigned int nsibling)
4025{
4026 const long timeout = slice_timeout(siblings[0]);
4027 struct intel_context *ce;
4028 struct i915_request *rq;
4029 struct igt_spinner spin;
4030 unsigned int n;
4031 int err = 0;
4032
4033 /*
4034 * Virtual requests must take part in timeslicing on the target engines.
4035 */
4036
4037 if (igt_spinner_init(&spin, gt))
4038 return -ENOMEM;
4039
4040 for (n = 0; n < nsibling; n++) {
4041 ce = intel_context_create(siblings[n]);
4042 if (IS_ERR(ce)) {
4043 err = PTR_ERR(ce);
4044 goto out;
4045 }
4046
4047 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048 intel_context_put(ce);
4049 if (IS_ERR(rq)) {
4050 err = PTR_ERR(rq);
4051 goto out;
4052 }
4053
4054 i915_request_add(rq);
4055 }
4056
4057 ce = intel_execlists_create_virtual(siblings, nsibling);
4058 if (IS_ERR(ce)) {
4059 err = PTR_ERR(ce);
4060 goto out;
4061 }
4062
4063 rq = intel_context_create_request(ce);
4064 intel_context_put(ce);
4065 if (IS_ERR(rq)) {
4066 err = PTR_ERR(rq);
4067 goto out;
4068 }
4069
4070 i915_request_get(rq);
4071 i915_request_add(rq);
4072 if (i915_request_wait(rq, 0, timeout) < 0) {
4073 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074 __func__, rq->engine->name);
4075 GEM_TRACE_DUMP();
4076 intel_gt_set_wedged(gt);
4077 err = -EIO;
4078 }
4079 i915_request_put(rq);
4080
4081out:
4082 igt_spinner_end(&spin);
4083 if (igt_flush_test(gt->i915))
4084 err = -EIO;
4085 igt_spinner_fini(&spin);
4086 return err;
4087}
4088
4089static int sliceout_virtual_engine(struct intel_gt *gt,
4090 struct intel_engine_cs **siblings,
4091 unsigned int nsibling)
4092{
4093 const long timeout = slice_timeout(siblings[0]);
4094 struct intel_context *ce;
4095 struct i915_request *rq;
4096 struct igt_spinner spin;
4097 unsigned int n;
4098 int err = 0;
4099
4100 /*
4101 * Virtual requests must allow others a fair timeslice.
4102 */
4103
4104 if (igt_spinner_init(&spin, gt))
4105 return -ENOMEM;
4106
4107 /* XXX We do not handle oversubscription and fairness with normal rq */
4108 for (n = 0; n < nsibling; n++) {
4109 ce = intel_execlists_create_virtual(siblings, nsibling);
4110 if (IS_ERR(ce)) {
4111 err = PTR_ERR(ce);
4112 goto out;
4113 }
4114
4115 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116 intel_context_put(ce);
4117 if (IS_ERR(rq)) {
4118 err = PTR_ERR(rq);
4119 goto out;
4120 }
4121
4122 i915_request_add(rq);
4123 }
4124
4125 for (n = 0; !err && n < nsibling; n++) {
4126 ce = intel_context_create(siblings[n]);
4127 if (IS_ERR(ce)) {
4128 err = PTR_ERR(ce);
4129 goto out;
4130 }
4131
4132 rq = intel_context_create_request(ce);
4133 intel_context_put(ce);
4134 if (IS_ERR(rq)) {
4135 err = PTR_ERR(rq);
4136 goto out;
4137 }
4138
4139 i915_request_get(rq);
4140 i915_request_add(rq);
4141 if (i915_request_wait(rq, 0, timeout) < 0) {
4142 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143 __func__, siblings[n]->name);
4144 GEM_TRACE_DUMP();
4145 intel_gt_set_wedged(gt);
4146 err = -EIO;
4147 }
4148 i915_request_put(rq);
4149 }
4150
4151out:
4152 igt_spinner_end(&spin);
4153 if (igt_flush_test(gt->i915))
4154 err = -EIO;
4155 igt_spinner_fini(&spin);
4156 return err;
4157}
4158
4159static int live_virtual_slice(void *arg)
4160{
4161 struct intel_gt *gt = arg;
4162 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163 unsigned int class;
4164 int err;
4165
4166 if (intel_uc_uses_guc_submission(>->uc))
4167 return 0;
4168
4169 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170 unsigned int nsibling;
4171
4172 nsibling = __select_siblings(gt, class, siblings,
4173 intel_engine_has_timeslices);
4174 if (nsibling < 2)
4175 continue;
4176
4177 err = slicein_virtual_engine(gt, siblings, nsibling);
4178 if (err)
4179 return err;
4180
4181 err = sliceout_virtual_engine(gt, siblings, nsibling);
4182 if (err)
4183 return err;
4184 }
4185
4186 return 0;
4187}
4188
4189static int preserved_virtual_engine(struct intel_gt *gt,
4190 struct intel_engine_cs **siblings,
4191 unsigned int nsibling)
4192{
4193 struct i915_request *last = NULL;
4194 struct intel_context *ve;
4195 struct i915_vma *scratch;
4196 struct igt_live_test t;
4197 unsigned int n;
4198 int err = 0;
4199 u32 *cs;
4200
4201 scratch =
4202 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203 PAGE_SIZE);
4204 if (IS_ERR(scratch))
4205 return PTR_ERR(scratch);
4206
4207 err = i915_vma_sync(scratch);
4208 if (err)
4209 goto out_scratch;
4210
4211 ve = intel_execlists_create_virtual(siblings, nsibling);
4212 if (IS_ERR(ve)) {
4213 err = PTR_ERR(ve);
4214 goto out_scratch;
4215 }
4216
4217 err = intel_context_pin(ve);
4218 if (err)
4219 goto out_put;
4220
4221 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222 if (err)
4223 goto out_unpin;
4224
4225 for (n = 0; n < NUM_GPR_DW; n++) {
4226 struct intel_engine_cs *engine = siblings[n % nsibling];
4227 struct i915_request *rq;
4228
4229 rq = i915_request_create(ve);
4230 if (IS_ERR(rq)) {
4231 err = PTR_ERR(rq);
4232 goto out_end;
4233 }
4234
4235 i915_request_put(last);
4236 last = i915_request_get(rq);
4237
4238 cs = intel_ring_begin(rq, 8);
4239 if (IS_ERR(cs)) {
4240 i915_request_add(rq);
4241 err = PTR_ERR(cs);
4242 goto out_end;
4243 }
4244
4245 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246 *cs++ = CS_GPR(engine, n);
4247 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248 *cs++ = 0;
4249
4250 *cs++ = MI_LOAD_REGISTER_IMM(1);
4251 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252 *cs++ = n + 1;
4253
4254 *cs++ = MI_NOOP;
4255 intel_ring_advance(rq, cs);
4256
4257 /* Restrict this request to run on a particular engine */
4258 rq->execution_mask = engine->mask;
4259 i915_request_add(rq);
4260 }
4261
4262 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263 err = -ETIME;
4264 goto out_end;
4265 }
4266
4267 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268 if (IS_ERR(cs)) {
4269 err = PTR_ERR(cs);
4270 goto out_end;
4271 }
4272
4273 for (n = 0; n < NUM_GPR_DW; n++) {
4274 if (cs[n] != n) {
4275 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276 cs[n], n);
4277 err = -EINVAL;
4278 break;
4279 }
4280 }
4281
4282 i915_gem_object_unpin_map(scratch->obj);
4283
4284out_end:
4285 if (igt_live_test_end(&t))
4286 err = -EIO;
4287 i915_request_put(last);
4288out_unpin:
4289 intel_context_unpin(ve);
4290out_put:
4291 intel_context_put(ve);
4292out_scratch:
4293 i915_vma_unpin_and_release(&scratch, 0);
4294 return err;
4295}
4296
4297static int live_virtual_preserved(void *arg)
4298{
4299 struct intel_gt *gt = arg;
4300 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301 unsigned int class;
4302
4303 /*
4304 * Check that the context image retains non-privileged (user) registers
4305 * from one engine to the next. For this we check that the CS_GPR
4306 * are preserved.
4307 */
4308
4309 if (intel_uc_uses_guc_submission(>->uc))
4310 return 0;
4311
4312 /* As we use CS_GPR we cannot run before they existed on all engines. */
4313 if (GRAPHICS_VER(gt->i915) < 9)
4314 return 0;
4315
4316 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317 int nsibling, err;
4318
4319 nsibling = select_siblings(gt, class, siblings);
4320 if (nsibling < 2)
4321 continue;
4322
4323 err = preserved_virtual_engine(gt, siblings, nsibling);
4324 if (err)
4325 return err;
4326 }
4327
4328 return 0;
4329}
4330
4331static int bond_virtual_engine(struct intel_gt *gt,
4332 unsigned int class,
4333 struct intel_engine_cs **siblings,
4334 unsigned int nsibling,
4335 unsigned int flags)
4336#define BOND_SCHEDULE BIT(0)
4337{
4338 struct intel_engine_cs *master;
4339 struct i915_request *rq[16];
4340 enum intel_engine_id id;
4341 struct igt_spinner spin;
4342 unsigned long n;
4343 int err;
4344
4345 /*
4346 * A set of bonded requests is intended to be run concurrently
4347 * across a number of engines. We use one request per-engine
4348 * and a magic fence to schedule each of the bonded requests
4349 * at the same time. A consequence of our current scheduler is that
4350 * we only move requests to the HW ready queue when the request
4351 * becomes ready, that is when all of its prerequisite fences have
4352 * been signaled. As one of those fences is the master submit fence,
4353 * there is a delay on all secondary fences as the HW may be
4354 * currently busy. Equally, as all the requests are independent,
4355 * they may have other fences that delay individual request
4356 * submission to HW. Ergo, we do not guarantee that all requests are
4357 * immediately submitted to HW at the same time, just that if the
4358 * rules are abided by, they are ready at the same time as the
4359 * first is submitted. Userspace can embed semaphores in its batch
4360 * to ensure parallel execution of its phases as it requires.
4361 * Though naturally it gets requested that perhaps the scheduler should
4362 * take care of parallel execution, even across preemption events on
4363 * different HW. (The proper answer is of course "lalalala".)
4364 *
4365 * With the submit-fence, we have identified three possible phases
4366 * of synchronisation depending on the master fence: queued (not
4367 * ready), executing, and signaled. The first two are quite simple
4368 * and checked below. However, the signaled master fence handling is
4369 * contentious. Currently we do not distinguish between a signaled
4370 * fence and an expired fence, as once signaled it does not convey
4371 * any information about the previous execution. It may even be freed
4372 * and hence checking later it may not exist at all. Ergo we currently
4373 * do not apply the bonding constraint for an already signaled fence,
4374 * as our expectation is that it should not constrain the secondaries
4375 * and is outside of the scope of the bonded request API (i.e. all
4376 * userspace requests are meant to be running in parallel). As
4377 * it imposes no constraint, and is effectively a no-op, we do not
4378 * check below as normal execution flows are checked extensively above.
4379 *
4380 * XXX Is the degenerate handling of signaled submit fences the
4381 * expected behaviour for userpace?
4382 */
4383
4384 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4385
4386 if (igt_spinner_init(&spin, gt))
4387 return -ENOMEM;
4388
4389 err = 0;
4390 rq[0] = ERR_PTR(-ENOMEM);
4391 for_each_engine(master, gt, id) {
4392 struct i915_sw_fence fence = {};
4393 struct intel_context *ce;
4394
4395 if (master->class == class)
4396 continue;
4397
4398 ce = intel_context_create(master);
4399 if (IS_ERR(ce)) {
4400 err = PTR_ERR(ce);
4401 goto out;
4402 }
4403
4404 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4405
4406 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4407 intel_context_put(ce);
4408 if (IS_ERR(rq[0])) {
4409 err = PTR_ERR(rq[0]);
4410 goto out;
4411 }
4412 i915_request_get(rq[0]);
4413
4414 if (flags & BOND_SCHEDULE) {
4415 onstack_fence_init(&fence);
4416 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4417 &fence,
4418 GFP_KERNEL);
4419 }
4420
4421 i915_request_add(rq[0]);
4422 if (err < 0)
4423 goto out;
4424
4425 if (!(flags & BOND_SCHEDULE) &&
4426 !igt_wait_for_spinner(&spin, rq[0])) {
4427 err = -EIO;
4428 goto out;
4429 }
4430
4431 for (n = 0; n < nsibling; n++) {
4432 struct intel_context *ve;
4433
4434 ve = intel_execlists_create_virtual(siblings, nsibling);
4435 if (IS_ERR(ve)) {
4436 err = PTR_ERR(ve);
4437 onstack_fence_fini(&fence);
4438 goto out;
4439 }
4440
4441 err = intel_virtual_engine_attach_bond(ve->engine,
4442 master,
4443 siblings[n]);
4444 if (err) {
4445 intel_context_put(ve);
4446 onstack_fence_fini(&fence);
4447 goto out;
4448 }
4449
4450 err = intel_context_pin(ve);
4451 intel_context_put(ve);
4452 if (err) {
4453 onstack_fence_fini(&fence);
4454 goto out;
4455 }
4456
4457 rq[n + 1] = i915_request_create(ve);
4458 intel_context_unpin(ve);
4459 if (IS_ERR(rq[n + 1])) {
4460 err = PTR_ERR(rq[n + 1]);
4461 onstack_fence_fini(&fence);
4462 goto out;
4463 }
4464 i915_request_get(rq[n + 1]);
4465
4466 err = i915_request_await_execution(rq[n + 1],
4467 &rq[0]->fence,
4468 ve->engine->bond_execute);
4469 i915_request_add(rq[n + 1]);
4470 if (err < 0) {
4471 onstack_fence_fini(&fence);
4472 goto out;
4473 }
4474 }
4475 onstack_fence_fini(&fence);
4476 intel_engine_flush_submission(master);
4477 igt_spinner_end(&spin);
4478
4479 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4480 pr_err("Master request did not execute (on %s)!\n",
4481 rq[0]->engine->name);
4482 err = -EIO;
4483 goto out;
4484 }
4485
4486 for (n = 0; n < nsibling; n++) {
4487 if (i915_request_wait(rq[n + 1], 0,
4488 MAX_SCHEDULE_TIMEOUT) < 0) {
4489 err = -EIO;
4490 goto out;
4491 }
4492
4493 if (rq[n + 1]->engine != siblings[n]) {
4494 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4495 siblings[n]->name,
4496 rq[n + 1]->engine->name,
4497 rq[0]->engine->name);
4498 err = -EINVAL;
4499 goto out;
4500 }
4501 }
4502
4503 for (n = 0; !IS_ERR(rq[n]); n++)
4504 i915_request_put(rq[n]);
4505 rq[0] = ERR_PTR(-ENOMEM);
4506 }
4507
4508out:
4509 for (n = 0; !IS_ERR(rq[n]); n++)
4510 i915_request_put(rq[n]);
4511 if (igt_flush_test(gt->i915))
4512 err = -EIO;
4513
4514 igt_spinner_fini(&spin);
4515 return err;
4516}
4517
4518static int live_virtual_bond(void *arg)
4519{
4520 static const struct phase {
4521 const char *name;
4522 unsigned int flags;
4523 } phases[] = {
4524 { "", 0 },
4525 { "schedule", BOND_SCHEDULE },
4526 { },
4527 };
4528 struct intel_gt *gt = arg;
4529 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4530 unsigned int class;
4531 int err;
4532
4533 if (intel_uc_uses_guc_submission(>->uc))
4534 return 0;
4535
4536 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4537 const struct phase *p;
4538 int nsibling;
4539
4540 nsibling = select_siblings(gt, class, siblings);
4541 if (nsibling < 2)
4542 continue;
4543
4544 for (p = phases; p->name; p++) {
4545 err = bond_virtual_engine(gt,
4546 class, siblings, nsibling,
4547 p->flags);
4548 if (err) {
4549 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4550 __func__, p->name, class, nsibling, err);
4551 return err;
4552 }
4553 }
4554 }
4555
4556 return 0;
4557}
4558
4559static int reset_virtual_engine(struct intel_gt *gt,
4560 struct intel_engine_cs **siblings,
4561 unsigned int nsibling)
4562{
4563 struct intel_engine_cs *engine;
4564 struct intel_context *ve;
4565 struct igt_spinner spin;
4566 struct i915_request *rq;
4567 unsigned int n;
4568 int err = 0;
4569
4570 /*
4571 * In order to support offline error capture for fast preempt reset,
4572 * we need to decouple the guilty request and ensure that it and its
4573 * descendents are not executed while the capture is in progress.
4574 */
4575
4576 if (igt_spinner_init(&spin, gt))
4577 return -ENOMEM;
4578
4579 ve = intel_execlists_create_virtual(siblings, nsibling);
4580 if (IS_ERR(ve)) {
4581 err = PTR_ERR(ve);
4582 goto out_spin;
4583 }
4584
4585 for (n = 0; n < nsibling; n++)
4586 st_engine_heartbeat_disable(siblings[n]);
4587
4588 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4589 if (IS_ERR(rq)) {
4590 err = PTR_ERR(rq);
4591 goto out_heartbeat;
4592 }
4593 i915_request_add(rq);
4594
4595 if (!igt_wait_for_spinner(&spin, rq)) {
4596 intel_gt_set_wedged(gt);
4597 err = -ETIME;
4598 goto out_heartbeat;
4599 }
4600
4601 engine = rq->engine;
4602 GEM_BUG_ON(engine == ve->engine);
4603
4604 /* Take ownership of the reset and tasklet */
4605 err = engine_lock_reset_tasklet(engine);
4606 if (err)
4607 goto out_heartbeat;
4608
4609 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4610 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4611
4612 /* Fake a preemption event; failed of course */
4613 spin_lock_irq(&engine->active.lock);
4614 __unwind_incomplete_requests(engine);
4615 spin_unlock_irq(&engine->active.lock);
4616 GEM_BUG_ON(rq->engine != engine);
4617
4618 /* Reset the engine while keeping our active request on hold */
4619 execlists_hold(engine, rq);
4620 GEM_BUG_ON(!i915_request_on_hold(rq));
4621
4622 __intel_engine_reset_bh(engine, NULL);
4623 GEM_BUG_ON(rq->fence.error != -EIO);
4624
4625 /* Release our grasp on the engine, letting CS flow again */
4626 engine_unlock_reset_tasklet(engine);
4627
4628 /* Check that we do not resubmit the held request */
4629 i915_request_get(rq);
4630 if (!i915_request_wait(rq, 0, HZ / 5)) {
4631 pr_err("%s: on hold request completed!\n",
4632 engine->name);
4633 intel_gt_set_wedged(gt);
4634 err = -EIO;
4635 goto out_rq;
4636 }
4637 GEM_BUG_ON(!i915_request_on_hold(rq));
4638
4639 /* But is resubmitted on release */
4640 execlists_unhold(engine, rq);
4641 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4642 pr_err("%s: held request did not complete!\n",
4643 engine->name);
4644 intel_gt_set_wedged(gt);
4645 err = -ETIME;
4646 }
4647
4648out_rq:
4649 i915_request_put(rq);
4650out_heartbeat:
4651 for (n = 0; n < nsibling; n++)
4652 st_engine_heartbeat_enable(siblings[n]);
4653
4654 intel_context_put(ve);
4655out_spin:
4656 igt_spinner_fini(&spin);
4657 return err;
4658}
4659
4660static int live_virtual_reset(void *arg)
4661{
4662 struct intel_gt *gt = arg;
4663 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4664 unsigned int class;
4665
4666 /*
4667 * Check that we handle a reset event within a virtual engine.
4668 * Only the physical engine is reset, but we have to check the flow
4669 * of the virtual requests around the reset, and make sure it is not
4670 * forgotten.
4671 */
4672
4673 if (intel_uc_uses_guc_submission(>->uc))
4674 return 0;
4675
4676 if (!intel_has_reset_engine(gt))
4677 return 0;
4678
4679 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4680 int nsibling, err;
4681
4682 nsibling = select_siblings(gt, class, siblings);
4683 if (nsibling < 2)
4684 continue;
4685
4686 err = reset_virtual_engine(gt, siblings, nsibling);
4687 if (err)
4688 return err;
4689 }
4690
4691 return 0;
4692}
4693
4694int intel_execlists_live_selftests(struct drm_i915_private *i915)
4695{
4696 static const struct i915_subtest tests[] = {
4697 SUBTEST(live_sanitycheck),
4698 SUBTEST(live_unlite_switch),
4699 SUBTEST(live_unlite_preempt),
4700 SUBTEST(live_unlite_ring),
4701 SUBTEST(live_pin_rewind),
4702 SUBTEST(live_hold_reset),
4703 SUBTEST(live_error_interrupt),
4704 SUBTEST(live_timeslice_preempt),
4705 SUBTEST(live_timeslice_rewind),
4706 SUBTEST(live_timeslice_queue),
4707 SUBTEST(live_timeslice_nopreempt),
4708 SUBTEST(live_busywait_preempt),
4709 SUBTEST(live_preempt),
4710 SUBTEST(live_late_preempt),
4711 SUBTEST(live_nopreempt),
4712 SUBTEST(live_preempt_cancel),
4713 SUBTEST(live_suppress_self_preempt),
4714 SUBTEST(live_chain_preempt),
4715 SUBTEST(live_preempt_ring),
4716 SUBTEST(live_preempt_gang),
4717 SUBTEST(live_preempt_timeout),
4718 SUBTEST(live_preempt_user),
4719 SUBTEST(live_preempt_smoke),
4720 SUBTEST(live_virtual_engine),
4721 SUBTEST(live_virtual_mask),
4722 SUBTEST(live_virtual_preserved),
4723 SUBTEST(live_virtual_slice),
4724 SUBTEST(live_virtual_bond),
4725 SUBTEST(live_virtual_reset),
4726 };
4727
4728 if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4729 return 0;
4730
4731 if (intel_gt_is_wedged(&i915->gt))
4732 return 0;
4733
4734 return intel_gt_live_subtests(tests, &i915->gt);
4735}