selftest_execlists.c - drivers/gpu/drm/i915/gt/selftest_execlists.c - Linux source code v4.6

Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2018 Intel Corporation
   4 */
   5
   6#include <linux/prime_numbers.h>
   7
   8#include "gem/i915_gem_pm.h"
   9#include "gt/intel_engine_heartbeat.h"
  10#include "gt/intel_reset.h"
  11#include "gt/selftest_engine_heartbeat.h"
  12
  13#include "i915_selftest.h"
  14#include "selftests/i915_random.h"
  15#include "selftests/igt_flush_test.h"
  16#include "selftests/igt_live_test.h"
  17#include "selftests/igt_spinner.h"
  18#include "selftests/lib_sw_fence.h"
  19
  20#include "gem/selftests/igt_gem_utils.h"
  21#include "gem/selftests/mock_context.h"
  22
  23#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
  24#define NUM_GPR 16
  25#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
  26
  27static bool is_active(struct i915_request *rq)
  28{
  29	if (i915_request_is_active(rq))
  30		return true;
  31
  32	if (i915_request_on_hold(rq))
  33		return true;
  34
  35	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
  36		return true;
  37
  38	return false;
  39}
  40
  41static int wait_for_submit(struct intel_engine_cs *engine,
  42			   struct i915_request *rq,
  43			   unsigned long timeout)
  44{
  45	/* Ignore our own attempts to suppress excess tasklets */
  46	tasklet_hi_schedule(&engine->execlists.tasklet);
  47
  48	timeout += jiffies;
  49	do {
  50		bool done = time_after(jiffies, timeout);
  51
  52		if (i915_request_completed(rq)) /* that was quick! */
  53			return 0;
  54
  55		/* Wait until the HW has acknowleged the submission (or err) */
  56		intel_engine_flush_submission(engine);
  57		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
  58			return 0;
  59
  60		if (done)
  61			return -ETIME;
  62
  63		cond_resched();
  64	} while (1);
  65}
  66
  67static int wait_for_reset(struct intel_engine_cs *engine,
  68			  struct i915_request *rq,
  69			  unsigned long timeout)
  70{
  71	timeout += jiffies;
  72
  73	do {
  74		cond_resched();
  75		intel_engine_flush_submission(engine);
  76
  77		if (READ_ONCE(engine->execlists.pending[0]))
  78			continue;
  79
  80		if (i915_request_completed(rq))
  81			break;
  82
  83		if (READ_ONCE(rq->fence.error))
  84			break;
  85	} while (time_before(jiffies, timeout));
  86
  87	flush_scheduled_work();
  88
  89	if (rq->fence.error != -EIO) {
  90		pr_err("%s: hanging request %llx:%lld not reset\n",
  91		       engine->name,
  92		       rq->fence.context,
  93		       rq->fence.seqno);
  94		return -EINVAL;
  95	}
  96
  97	/* Give the request a jiffie to complete after flushing the worker */
  98	if (i915_request_wait(rq, 0,
  99			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
 100		pr_err("%s: hanging request %llx:%lld did not complete\n",
 101		       engine->name,
 102		       rq->fence.context,
 103		       rq->fence.seqno);
 104		return -ETIME;
 105	}
 106
 107	return 0;
 108}
 109
 110static int live_sanitycheck(void *arg)
 111{
 112	struct intel_gt *gt = arg;
 113	struct intel_engine_cs *engine;
 114	enum intel_engine_id id;
 115	struct igt_spinner spin;
 116	int err = 0;
 117
 118	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 119		return 0;
 120
 121	if (igt_spinner_init(&spin, gt))
 122		return -ENOMEM;
 123
 124	for_each_engine(engine, gt, id) {
 125		struct intel_context *ce;
 126		struct i915_request *rq;
 127
 128		ce = intel_context_create(engine);
 129		if (IS_ERR(ce)) {
 130			err = PTR_ERR(ce);
 131			break;
 132		}
 133
 134		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 135		if (IS_ERR(rq)) {
 136			err = PTR_ERR(rq);
 137			goto out_ctx;
 138		}
 139
 140		i915_request_add(rq);
 141		if (!igt_wait_for_spinner(&spin, rq)) {
 142			GEM_TRACE("spinner failed to start\n");
 143			GEM_TRACE_DUMP();
 144			intel_gt_set_wedged(gt);
 145			err = -EIO;
 146			goto out_ctx;
 147		}
 148
 149		igt_spinner_end(&spin);
 150		if (igt_flush_test(gt->i915)) {
 151			err = -EIO;
 152			goto out_ctx;
 153		}
 154
 155out_ctx:
 156		intel_context_put(ce);
 157		if (err)
 158			break;
 159	}
 160
 161	igt_spinner_fini(&spin);
 162	return err;
 163}
 164
 165static int live_unlite_restore(struct intel_gt *gt, int prio)
 166{
 167	struct intel_engine_cs *engine;
 168	enum intel_engine_id id;
 169	struct igt_spinner spin;
 170	int err = -ENOMEM;
 171
 172	/*
 173	 * Check that we can correctly context switch between 2 instances
 174	 * on the same engine from the same parent context.
 175	 */
 176
 177	if (igt_spinner_init(&spin, gt))
 178		return err;
 179
 180	err = 0;
 181	for_each_engine(engine, gt, id) {
 182		struct intel_context *ce[2] = {};
 183		struct i915_request *rq[2];
 184		struct igt_live_test t;
 185		int n;
 186
 187		if (prio && !intel_engine_has_preemption(engine))
 188			continue;
 189
 190		if (!intel_engine_can_store_dword(engine))
 191			continue;
 192
 193		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 194			err = -EIO;
 195			break;
 196		}
 197		st_engine_heartbeat_disable(engine);
 198
 199		for (n = 0; n < ARRAY_SIZE(ce); n++) {
 200			struct intel_context *tmp;
 201
 202			tmp = intel_context_create(engine);
 203			if (IS_ERR(tmp)) {
 204				err = PTR_ERR(tmp);
 205				goto err_ce;
 206			}
 207
 208			err = intel_context_pin(tmp);
 209			if (err) {
 210				intel_context_put(tmp);
 211				goto err_ce;
 212			}
 213
 214			/*
 215			 * Setup the pair of contexts such that if we
 216			 * lite-restore using the RING_TAIL from ce[1] it
 217			 * will execute garbage from ce[0]->ring.
 218			 */
 219			memset(tmp->ring->vaddr,
 220			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
 221			       tmp->ring->vma->size);
 222
 223			ce[n] = tmp;
 224		}
 225		GEM_BUG_ON(!ce[1]->ring->size);
 226		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
 227		lrc_update_regs(ce[1], engine, ce[1]->ring->head);
 228
 229		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 230		if (IS_ERR(rq[0])) {
 231			err = PTR_ERR(rq[0]);
 232			goto err_ce;
 233		}
 234
 235		i915_request_get(rq[0]);
 236		i915_request_add(rq[0]);
 237		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
 238
 239		if (!igt_wait_for_spinner(&spin, rq[0])) {
 240			i915_request_put(rq[0]);
 241			goto err_ce;
 242		}
 243
 244		rq[1] = i915_request_create(ce[1]);
 245		if (IS_ERR(rq[1])) {
 246			err = PTR_ERR(rq[1]);
 247			i915_request_put(rq[0]);
 248			goto err_ce;
 249		}
 250
 251		if (!prio) {
 252			/*
 253			 * Ensure we do the switch to ce[1] on completion.
 254			 *
 255			 * rq[0] is already submitted, so this should reduce
 256			 * to a no-op (a wait on a request on the same engine
 257			 * uses the submit fence, not the completion fence),
 258			 * but it will install a dependency on rq[1] for rq[0]
 259			 * that will prevent the pair being reordered by
 260			 * timeslicing.
 261			 */
 262			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 263		}
 264
 265		i915_request_get(rq[1]);
 266		i915_request_add(rq[1]);
 267		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
 268		i915_request_put(rq[0]);
 269
 270		if (prio) {
 271			struct i915_sched_attr attr = {
 272				.priority = prio,
 273			};
 274
 275			/* Alternatively preempt the spinner with ce[1] */
 276			engine->schedule(rq[1], &attr);
 277		}
 278
 279		/* And switch back to ce[0] for good measure */
 280		rq[0] = i915_request_create(ce[0]);
 281		if (IS_ERR(rq[0])) {
 282			err = PTR_ERR(rq[0]);
 283			i915_request_put(rq[1]);
 284			goto err_ce;
 285		}
 286
 287		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
 288		i915_request_get(rq[0]);
 289		i915_request_add(rq[0]);
 290		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
 291		i915_request_put(rq[1]);
 292		i915_request_put(rq[0]);
 293
 294err_ce:
 295		intel_engine_flush_submission(engine);
 296		igt_spinner_end(&spin);
 297		for (n = 0; n < ARRAY_SIZE(ce); n++) {
 298			if (IS_ERR_OR_NULL(ce[n]))
 299				break;
 300
 301			intel_context_unpin(ce[n]);
 302			intel_context_put(ce[n]);
 303		}
 304
 305		st_engine_heartbeat_enable(engine);
 306		if (igt_live_test_end(&t))
 307			err = -EIO;
 308		if (err)
 309			break;
 310	}
 311
 312	igt_spinner_fini(&spin);
 313	return err;
 314}
 315
 316static int live_unlite_switch(void *arg)
 317{
 318	return live_unlite_restore(arg, 0);
 319}
 320
 321static int live_unlite_preempt(void *arg)
 322{
 323	return live_unlite_restore(arg, I915_PRIORITY_MAX);
 324}
 325
 326static int live_unlite_ring(void *arg)
 327{
 328	struct intel_gt *gt = arg;
 329	struct intel_engine_cs *engine;
 330	struct igt_spinner spin;
 331	enum intel_engine_id id;
 332	int err = 0;
 333
 334	/*
 335	 * Setup a preemption event that will cause almost the entire ring
 336	 * to be unwound, potentially fooling our intel_ring_direction()
 337	 * into emitting a forward lite-restore instead of the rollback.
 338	 */
 339
 340	if (igt_spinner_init(&spin, gt))
 341		return -ENOMEM;
 342
 343	for_each_engine(engine, gt, id) {
 344		struct intel_context *ce[2] = {};
 345		struct i915_request *rq;
 346		struct igt_live_test t;
 347		int n;
 348
 349		if (!intel_engine_has_preemption(engine))
 350			continue;
 351
 352		if (!intel_engine_can_store_dword(engine))
 353			continue;
 354
 355		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 356			err = -EIO;
 357			break;
 358		}
 359		st_engine_heartbeat_disable(engine);
 360
 361		for (n = 0; n < ARRAY_SIZE(ce); n++) {
 362			struct intel_context *tmp;
 363
 364			tmp = intel_context_create(engine);
 365			if (IS_ERR(tmp)) {
 366				err = PTR_ERR(tmp);
 367				goto err_ce;
 368			}
 369
 370			err = intel_context_pin(tmp);
 371			if (err) {
 372				intel_context_put(tmp);
 373				goto err_ce;
 374			}
 375
 376			memset32(tmp->ring->vaddr,
 377				 0xdeadbeef, /* trigger a hang if executed */
 378				 tmp->ring->vma->size / sizeof(u32));
 379
 380			ce[n] = tmp;
 381		}
 382
 383		/* Create max prio spinner, followed by N low prio nops */
 384		rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
 385		if (IS_ERR(rq)) {
 386			err = PTR_ERR(rq);
 387			goto err_ce;
 388		}
 389
 390		i915_request_get(rq);
 391		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 392		i915_request_add(rq);
 393
 394		if (!igt_wait_for_spinner(&spin, rq)) {
 395			intel_gt_set_wedged(gt);
 396			i915_request_put(rq);
 397			err = -ETIME;
 398			goto err_ce;
 399		}
 400
 401		/* Fill the ring, until we will cause a wrap */
 402		n = 0;
 403		while (intel_ring_direction(ce[0]->ring,
 404					    rq->wa_tail,
 405					    ce[0]->ring->tail) <= 0) {
 406			struct i915_request *tmp;
 407
 408			tmp = intel_context_create_request(ce[0]);
 409			if (IS_ERR(tmp)) {
 410				err = PTR_ERR(tmp);
 411				i915_request_put(rq);
 412				goto err_ce;
 413			}
 414
 415			i915_request_add(tmp);
 416			intel_engine_flush_submission(engine);
 417			n++;
 418		}
 419		intel_engine_flush_submission(engine);
 420		pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
 421			 engine->name, n,
 422			 ce[0]->ring->size,
 423			 ce[0]->ring->tail,
 424			 ce[0]->ring->emit,
 425			 rq->tail);
 426		GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
 427						rq->tail,
 428						ce[0]->ring->tail) <= 0);
 429		i915_request_put(rq);
 430
 431		/* Create a second ring to preempt the first ring after rq[0] */
 432		rq = intel_context_create_request(ce[1]);
 433		if (IS_ERR(rq)) {
 434			err = PTR_ERR(rq);
 435			goto err_ce;
 436		}
 437
 438		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 439		i915_request_get(rq);
 440		i915_request_add(rq);
 441
 442		err = wait_for_submit(engine, rq, HZ / 2);
 443		i915_request_put(rq);
 444		if (err) {
 445			pr_err("%s: preemption request was not submitted\n",
 446			       engine->name);
 447			err = -ETIME;
 448		}
 449
 450		pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
 451			 engine->name,
 452			 ce[0]->ring->tail, ce[0]->ring->emit,
 453			 ce[1]->ring->tail, ce[1]->ring->emit);
 454
 455err_ce:
 456		intel_engine_flush_submission(engine);
 457		igt_spinner_end(&spin);
 458		for (n = 0; n < ARRAY_SIZE(ce); n++) {
 459			if (IS_ERR_OR_NULL(ce[n]))
 460				break;
 461
 462			intel_context_unpin(ce[n]);
 463			intel_context_put(ce[n]);
 464		}
 465		st_engine_heartbeat_enable(engine);
 466		if (igt_live_test_end(&t))
 467			err = -EIO;
 468		if (err)
 469			break;
 470	}
 471
 472	igt_spinner_fini(&spin);
 473	return err;
 474}
 475
 476static int live_pin_rewind(void *arg)
 477{
 478	struct intel_gt *gt = arg;
 479	struct intel_engine_cs *engine;
 480	enum intel_engine_id id;
 481	int err = 0;
 482
 483	/*
 484	 * We have to be careful not to trust intel_ring too much, for example
 485	 * ring->head is updated upon retire which is out of sync with pinning
 486	 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
 487	 * or else we risk writing an older, stale value.
 488	 *
 489	 * To simulate this, let's apply a bit of deliberate sabotague.
 490	 */
 491
 492	for_each_engine(engine, gt, id) {
 493		struct intel_context *ce;
 494		struct i915_request *rq;
 495		struct intel_ring *ring;
 496		struct igt_live_test t;
 497
 498		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 499			err = -EIO;
 500			break;
 501		}
 502
 503		ce = intel_context_create(engine);
 504		if (IS_ERR(ce)) {
 505			err = PTR_ERR(ce);
 506			break;
 507		}
 508
 509		err = intel_context_pin(ce);
 510		if (err) {
 511			intel_context_put(ce);
 512			break;
 513		}
 514
 515		/* Keep the context awake while we play games */
 516		err = i915_active_acquire(&ce->active);
 517		if (err) {
 518			intel_context_unpin(ce);
 519			intel_context_put(ce);
 520			break;
 521		}
 522		ring = ce->ring;
 523
 524		/* Poison the ring, and offset the next request from HEAD */
 525		memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
 526		ring->emit = ring->size / 2;
 527		ring->tail = ring->emit;
 528		GEM_BUG_ON(ring->head);
 529
 530		intel_context_unpin(ce);
 531
 532		/* Submit a simple nop request */
 533		GEM_BUG_ON(intel_context_is_pinned(ce));
 534		rq = intel_context_create_request(ce);
 535		i915_active_release(&ce->active); /* e.g. async retire */
 536		intel_context_put(ce);
 537		if (IS_ERR(rq)) {
 538			err = PTR_ERR(rq);
 539			break;
 540		}
 541		GEM_BUG_ON(!rq->head);
 542		i915_request_add(rq);
 543
 544		/* Expect not to hang! */
 545		if (igt_live_test_end(&t)) {
 546			err = -EIO;
 547			break;
 548		}
 549	}
 550
 551	return err;
 552}
 553
 554static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
 555{
 556	tasklet_disable(&engine->execlists.tasklet);
 557	local_bh_disable();
 558
 559	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
 560			     &engine->gt->reset.flags)) {
 561		local_bh_enable();
 562		tasklet_enable(&engine->execlists.tasklet);
 563
 564		intel_gt_set_wedged(engine->gt);
 565		return -EBUSY;
 566	}
 567
 568	return 0;
 569}
 570
 571static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
 572{
 573	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
 574			      &engine->gt->reset.flags);
 575
 576	local_bh_enable();
 577	tasklet_enable(&engine->execlists.tasklet);
 578}
 579
 580static int live_hold_reset(void *arg)
 581{
 582	struct intel_gt *gt = arg;
 583	struct intel_engine_cs *engine;
 584	enum intel_engine_id id;
 585	struct igt_spinner spin;
 586	int err = 0;
 587
 588	/*
 589	 * In order to support offline error capture for fast preempt reset,
 590	 * we need to decouple the guilty request and ensure that it and its
 591	 * descendents are not executed while the capture is in progress.
 592	 */
 593
 594	if (!intel_has_reset_engine(gt))
 595		return 0;
 596
 597	if (igt_spinner_init(&spin, gt))
 598		return -ENOMEM;
 599
 600	for_each_engine(engine, gt, id) {
 601		struct intel_context *ce;
 602		struct i915_request *rq;
 603
 604		ce = intel_context_create(engine);
 605		if (IS_ERR(ce)) {
 606			err = PTR_ERR(ce);
 607			break;
 608		}
 609
 610		st_engine_heartbeat_disable(engine);
 611
 612		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
 613		if (IS_ERR(rq)) {
 614			err = PTR_ERR(rq);
 615			goto out;
 616		}
 617		i915_request_add(rq);
 618
 619		if (!igt_wait_for_spinner(&spin, rq)) {
 620			intel_gt_set_wedged(gt);
 621			err = -ETIME;
 622			goto out;
 623		}
 624
 625		/* We have our request executing, now remove it and reset */
 626
 627		err = engine_lock_reset_tasklet(engine);
 628		if (err)
 629			goto out;
 630
 631		engine->execlists.tasklet.callback(&engine->execlists.tasklet);
 632		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
 633
 634		i915_request_get(rq);
 635		execlists_hold(engine, rq);
 636		GEM_BUG_ON(!i915_request_on_hold(rq));
 637
 638		__intel_engine_reset_bh(engine, NULL);
 639		GEM_BUG_ON(rq->fence.error != -EIO);
 640
 641		engine_unlock_reset_tasklet(engine);
 642
 643		/* Check that we do not resubmit the held request */
 644		if (!i915_request_wait(rq, 0, HZ / 5)) {
 645			pr_err("%s: on hold request completed!\n",
 646			       engine->name);
 647			i915_request_put(rq);
 648			err = -EIO;
 649			goto out;
 650		}
 651		GEM_BUG_ON(!i915_request_on_hold(rq));
 652
 653		/* But is resubmitted on release */
 654		execlists_unhold(engine, rq);
 655		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 656			pr_err("%s: held request did not complete!\n",
 657			       engine->name);
 658			intel_gt_set_wedged(gt);
 659			err = -ETIME;
 660		}
 661		i915_request_put(rq);
 662
 663out:
 664		st_engine_heartbeat_enable(engine);
 665		intel_context_put(ce);
 666		if (err)
 667			break;
 668	}
 669
 670	igt_spinner_fini(&spin);
 671	return err;
 672}
 673
 674static const char *error_repr(int err)
 675{
 676	return err ? "bad" : "good";
 677}
 678
 679static int live_error_interrupt(void *arg)
 680{
 681	static const struct error_phase {
 682		enum { GOOD = 0, BAD = -EIO } error[2];
 683	} phases[] = {
 684		{ { BAD,  GOOD } },
 685		{ { BAD,  BAD  } },
 686		{ { BAD,  GOOD } },
 687		{ { GOOD, GOOD } }, /* sentinel */
 688	};
 689	struct intel_gt *gt = arg;
 690	struct intel_engine_cs *engine;
 691	enum intel_engine_id id;
 692
 693	/*
 694	 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
 695	 * of invalid commands in user batches that will cause a GPU hang.
 696	 * This is a faster mechanism than using hangcheck/heartbeats, but
 697	 * only detects problems the HW knows about -- it will not warn when
 698	 * we kill the HW!
 699	 *
 700	 * To verify our detection and reset, we throw some invalid commands
 701	 * at the HW and wait for the interrupt.
 702	 */
 703
 704	if (!intel_has_reset_engine(gt))
 705		return 0;
 706
 707	for_each_engine(engine, gt, id) {
 708		const struct error_phase *p;
 709		int err = 0;
 710
 711		st_engine_heartbeat_disable(engine);
 712
 713		for (p = phases; p->error[0] != GOOD; p++) {
 714			struct i915_request *client[ARRAY_SIZE(phases->error)];
 715			u32 *cs;
 716			int i;
 717
 718			memset(client, 0, sizeof(*client));
 719			for (i = 0; i < ARRAY_SIZE(client); i++) {
 720				struct intel_context *ce;
 721				struct i915_request *rq;
 722
 723				ce = intel_context_create(engine);
 724				if (IS_ERR(ce)) {
 725					err = PTR_ERR(ce);
 726					goto out;
 727				}
 728
 729				rq = intel_context_create_request(ce);
 730				intel_context_put(ce);
 731				if (IS_ERR(rq)) {
 732					err = PTR_ERR(rq);
 733					goto out;
 734				}
 735
 736				if (rq->engine->emit_init_breadcrumb) {
 737					err = rq->engine->emit_init_breadcrumb(rq);
 738					if (err) {
 739						i915_request_add(rq);
 740						goto out;
 741					}
 742				}
 743
 744				cs = intel_ring_begin(rq, 2);
 745				if (IS_ERR(cs)) {
 746					i915_request_add(rq);
 747					err = PTR_ERR(cs);
 748					goto out;
 749				}
 750
 751				if (p->error[i]) {
 752					*cs++ = 0xdeadbeef;
 753					*cs++ = 0xdeadbeef;
 754				} else {
 755					*cs++ = MI_NOOP;
 756					*cs++ = MI_NOOP;
 757				}
 758
 759				client[i] = i915_request_get(rq);
 760				i915_request_add(rq);
 761			}
 762
 763			err = wait_for_submit(engine, client[0], HZ / 2);
 764			if (err) {
 765				pr_err("%s: first request did not start within time!\n",
 766				       engine->name);
 767				err = -ETIME;
 768				goto out;
 769			}
 770
 771			for (i = 0; i < ARRAY_SIZE(client); i++) {
 772				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
 773					pr_debug("%s: %s request incomplete!\n",
 774						 engine->name,
 775						 error_repr(p->error[i]));
 776
 777				if (!i915_request_started(client[i])) {
 778					pr_err("%s: %s request not started!\n",
 779					       engine->name,
 780					       error_repr(p->error[i]));
 781					err = -ETIME;
 782					goto out;
 783				}
 784
 785				/* Kick the tasklet to process the error */
 786				intel_engine_flush_submission(engine);
 787				if (client[i]->fence.error != p->error[i]) {
 788					pr_err("%s: %s request (%s) with wrong error code: %d\n",
 789					       engine->name,
 790					       error_repr(p->error[i]),
 791					       i915_request_completed(client[i]) ? "completed" : "running",
 792					       client[i]->fence.error);
 793					err = -EINVAL;
 794					goto out;
 795				}
 796			}
 797
 798out:
 799			for (i = 0; i < ARRAY_SIZE(client); i++)
 800				if (client[i])
 801					i915_request_put(client[i]);
 802			if (err) {
 803				pr_err("%s: failed at phase[%zd] { %d, %d }\n",
 804				       engine->name, p - phases,
 805				       p->error[0], p->error[1]);
 806				break;
 807			}
 808		}
 809
 810		st_engine_heartbeat_enable(engine);
 811		if (err) {
 812			intel_gt_set_wedged(gt);
 813			return err;
 814		}
 815	}
 816
 817	return 0;
 818}
 819
 820static int
 821emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 822{
 823	u32 *cs;
 824
 825	cs = intel_ring_begin(rq, 10);
 826	if (IS_ERR(cs))
 827		return PTR_ERR(cs);
 828
 829	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 830
 831	*cs++ = MI_SEMAPHORE_WAIT |
 832		MI_SEMAPHORE_GLOBAL_GTT |
 833		MI_SEMAPHORE_POLL |
 834		MI_SEMAPHORE_SAD_NEQ_SDD;
 835	*cs++ = 0;
 836	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
 837	*cs++ = 0;
 838
 839	if (idx > 0) {
 840		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 841		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 842		*cs++ = 0;
 843		*cs++ = 1;
 844	} else {
 845		*cs++ = MI_NOOP;
 846		*cs++ = MI_NOOP;
 847		*cs++ = MI_NOOP;
 848		*cs++ = MI_NOOP;
 849	}
 850
 851	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 852
 853	intel_ring_advance(rq, cs);
 854	return 0;
 855}
 856
 857static struct i915_request *
 858semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
 859{
 860	struct intel_context *ce;
 861	struct i915_request *rq;
 862	int err;
 863
 864	ce = intel_context_create(engine);
 865	if (IS_ERR(ce))
 866		return ERR_CAST(ce);
 867
 868	rq = intel_context_create_request(ce);
 869	if (IS_ERR(rq))
 870		goto out_ce;
 871
 872	err = 0;
 873	if (rq->engine->emit_init_breadcrumb)
 874		err = rq->engine->emit_init_breadcrumb(rq);
 875	if (err == 0)
 876		err = emit_semaphore_chain(rq, vma, idx);
 877	if (err == 0)
 878		i915_request_get(rq);
 879	i915_request_add(rq);
 880	if (err)
 881		rq = ERR_PTR(err);
 882
 883out_ce:
 884	intel_context_put(ce);
 885	return rq;
 886}
 887
 888static int
 889release_queue(struct intel_engine_cs *engine,
 890	      struct i915_vma *vma,
 891	      int idx, int prio)
 892{
 893	struct i915_sched_attr attr = {
 894		.priority = prio,
 895	};
 896	struct i915_request *rq;
 897	u32 *cs;
 898
 899	rq = intel_engine_create_kernel_request(engine);
 900	if (IS_ERR(rq))
 901		return PTR_ERR(rq);
 902
 903	cs = intel_ring_begin(rq, 4);
 904	if (IS_ERR(cs)) {
 905		i915_request_add(rq);
 906		return PTR_ERR(cs);
 907	}
 908
 909	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 910	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
 911	*cs++ = 0;
 912	*cs++ = 1;
 913
 914	intel_ring_advance(rq, cs);
 915
 916	i915_request_get(rq);
 917	i915_request_add(rq);
 918
 919	local_bh_disable();
 920	engine->schedule(rq, &attr);
 921	local_bh_enable(); /* kick tasklet */
 922
 923	i915_request_put(rq);
 924
 925	return 0;
 926}
 927
 928static int
 929slice_semaphore_queue(struct intel_engine_cs *outer,
 930		      struct i915_vma *vma,
 931		      int count)
 932{
 933	struct intel_engine_cs *engine;
 934	struct i915_request *head;
 935	enum intel_engine_id id;
 936	int err, i, n = 0;
 937
 938	head = semaphore_queue(outer, vma, n++);
 939	if (IS_ERR(head))
 940		return PTR_ERR(head);
 941
 942	for_each_engine(engine, outer->gt, id) {
 943		if (!intel_engine_has_preemption(engine))
 944			continue;
 945
 946		for (i = 0; i < count; i++) {
 947			struct i915_request *rq;
 948
 949			rq = semaphore_queue(engine, vma, n++);
 950			if (IS_ERR(rq)) {
 951				err = PTR_ERR(rq);
 952				goto out;
 953			}
 954
 955			i915_request_put(rq);
 956		}
 957	}
 958
 959	err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
 960	if (err)
 961		goto out;
 962
 963	if (i915_request_wait(head, 0,
 964			      2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
 965		pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
 966		       outer->name, count, n);
 967		GEM_TRACE_DUMP();
 968		intel_gt_set_wedged(outer->gt);
 969		err = -EIO;
 970	}
 971
 972out:
 973	i915_request_put(head);
 974	return err;
 975}
 976
 977static int live_timeslice_preempt(void *arg)
 978{
 979	struct intel_gt *gt = arg;
 980	struct drm_i915_gem_object *obj;
 981	struct intel_engine_cs *engine;
 982	enum intel_engine_id id;
 983	struct i915_vma *vma;
 984	void *vaddr;
 985	int err = 0;
 986
 987	/*
 988	 * If a request takes too long, we would like to give other users
 989	 * a fair go on the GPU. In particular, users may create batches
 990	 * that wait upon external input, where that input may even be
 991	 * supplied by another GPU job. To avoid blocking forever, we
 992	 * need to preempt the current task and replace it with another
 993	 * ready task.
 994	 */
 995	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 996		return 0;
 997
 998	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 999	if (IS_ERR(obj))
1000		return PTR_ERR(obj);
1001
1002	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1003	if (IS_ERR(vma)) {
1004		err = PTR_ERR(vma);
1005		goto err_obj;
1006	}
1007
1008	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1009	if (IS_ERR(vaddr)) {
1010		err = PTR_ERR(vaddr);
1011		goto err_obj;
1012	}
1013
1014	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1015	if (err)
1016		goto err_map;
1017
1018	err = i915_vma_sync(vma);
1019	if (err)
1020		goto err_pin;
1021
1022	for_each_engine(engine, gt, id) {
1023		if (!intel_engine_has_preemption(engine))
1024			continue;
1025
1026		memset(vaddr, 0, PAGE_SIZE);
1027
1028		st_engine_heartbeat_disable(engine);
1029		err = slice_semaphore_queue(engine, vma, 5);
1030		st_engine_heartbeat_enable(engine);
1031		if (err)
1032			goto err_pin;
1033
1034		if (igt_flush_test(gt->i915)) {
1035			err = -EIO;
1036			goto err_pin;
1037		}
1038	}
1039
1040err_pin:
1041	i915_vma_unpin(vma);
1042err_map:
1043	i915_gem_object_unpin_map(obj);
1044err_obj:
1045	i915_gem_object_put(obj);
1046	return err;
1047}
1048
1049static struct i915_request *
1050create_rewinder(struct intel_context *ce,
1051		struct i915_request *wait,
1052		void *slot, int idx)
1053{
1054	const u32 offset =
1055		i915_ggtt_offset(ce->engine->status_page.vma) +
1056		offset_in_page(slot);
1057	struct i915_request *rq;
1058	u32 *cs;
1059	int err;
1060
1061	rq = intel_context_create_request(ce);
1062	if (IS_ERR(rq))
1063		return rq;
1064
1065	if (wait) {
1066		err = i915_request_await_dma_fence(rq, &wait->fence);
1067		if (err)
1068			goto err;
1069	}
1070
1071	cs = intel_ring_begin(rq, 14);
1072	if (IS_ERR(cs)) {
1073		err = PTR_ERR(cs);
1074		goto err;
1075	}
1076
1077	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1078	*cs++ = MI_NOOP;
1079
1080	*cs++ = MI_SEMAPHORE_WAIT |
1081		MI_SEMAPHORE_GLOBAL_GTT |
1082		MI_SEMAPHORE_POLL |
1083		MI_SEMAPHORE_SAD_GTE_SDD;
1084	*cs++ = idx;
1085	*cs++ = offset;
1086	*cs++ = 0;
1087
1088	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1089	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1090	*cs++ = offset + idx * sizeof(u32);
1091	*cs++ = 0;
1092
1093	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1094	*cs++ = offset;
1095	*cs++ = 0;
1096	*cs++ = idx + 1;
1097
1098	intel_ring_advance(rq, cs);
1099
1100	err = 0;
1101err:
1102	i915_request_get(rq);
1103	i915_request_add(rq);
1104	if (err) {
1105		i915_request_put(rq);
1106		return ERR_PTR(err);
1107	}
1108
1109	return rq;
1110}
1111
1112static int live_timeslice_rewind(void *arg)
1113{
1114	struct intel_gt *gt = arg;
1115	struct intel_engine_cs *engine;
1116	enum intel_engine_id id;
1117
1118	/*
1119	 * The usual presumption on timeslice expiration is that we replace
1120	 * the active context with another. However, given a chain of
1121	 * dependencies we may end up with replacing the context with itself,
1122	 * but only a few of those requests, forcing us to rewind the
1123	 * RING_TAIL of the original request.
1124	 */
1125	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1126		return 0;
1127
1128	for_each_engine(engine, gt, id) {
1129		enum { A1, A2, B1 };
1130		enum { X = 1, Z, Y };
1131		struct i915_request *rq[3] = {};
1132		struct intel_context *ce;
1133		unsigned long timeslice;
1134		int i, err = 0;
1135		u32 *slot;
1136
1137		if (!intel_engine_has_timeslices(engine))
1138			continue;
1139
1140		/*
1141		 * A:rq1 -- semaphore wait, timestamp X
1142		 * A:rq2 -- write timestamp Y
1143		 *
1144		 * B:rq1 [await A:rq1] -- write timestamp Z
1145		 *
1146		 * Force timeslice, release semaphore.
1147		 *
1148		 * Expect execution/evaluation order XZY
1149		 */
1150
1151		st_engine_heartbeat_disable(engine);
1152		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1153
1154		slot = memset32(engine->status_page.addr + 1000, 0, 4);
1155
1156		ce = intel_context_create(engine);
1157		if (IS_ERR(ce)) {
1158			err = PTR_ERR(ce);
1159			goto err;
1160		}
1161
1162		rq[A1] = create_rewinder(ce, NULL, slot, X);
1163		if (IS_ERR(rq[A1])) {
1164			intel_context_put(ce);
1165			goto err;
1166		}
1167
1168		rq[A2] = create_rewinder(ce, NULL, slot, Y);
1169		intel_context_put(ce);
1170		if (IS_ERR(rq[A2]))
1171			goto err;
1172
1173		err = wait_for_submit(engine, rq[A2], HZ / 2);
1174		if (err) {
1175			pr_err("%s: failed to submit first context\n",
1176			       engine->name);
1177			goto err;
1178		}
1179
1180		ce = intel_context_create(engine);
1181		if (IS_ERR(ce)) {
1182			err = PTR_ERR(ce);
1183			goto err;
1184		}
1185
1186		rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1187		intel_context_put(ce);
1188		if (IS_ERR(rq[2]))
1189			goto err;
1190
1191		err = wait_for_submit(engine, rq[B1], HZ / 2);
1192		if (err) {
1193			pr_err("%s: failed to submit second context\n",
1194			       engine->name);
1195			goto err;
1196		}
1197
1198		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1199		ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1200		while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1201			/* Wait for the timeslice to kick in */
1202			del_timer(&engine->execlists.timer);
1203			tasklet_hi_schedule(&engine->execlists.tasklet);
1204			intel_engine_flush_submission(engine);
1205		}
1206		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1207		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1208		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1209		GEM_BUG_ON(i915_request_is_active(rq[A2]));
1210
1211		/* Release the hounds! */
1212		slot[0] = 1;
1213		wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1214
1215		for (i = 1; i <= 3; i++) {
1216			unsigned long timeout = jiffies + HZ / 2;
1217
1218			while (!READ_ONCE(slot[i]) &&
1219			       time_before(jiffies, timeout))
1220				;
1221
1222			if (!time_before(jiffies, timeout)) {
1223				pr_err("%s: rq[%d] timed out\n",
1224				       engine->name, i - 1);
1225				err = -ETIME;
1226				goto err;
1227			}
1228
1229			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1230		}
1231
1232		/* XZY: XZ < XY */
1233		if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1234			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1235			       engine->name,
1236			       slot[Z] - slot[X],
1237			       slot[Y] - slot[X]);
1238			err = -EINVAL;
1239		}
1240
1241err:
1242		memset32(&slot[0], -1, 4);
1243		wmb();
1244
1245		engine->props.timeslice_duration_ms = timeslice;
1246		st_engine_heartbeat_enable(engine);
1247		for (i = 0; i < 3; i++)
1248			i915_request_put(rq[i]);
1249		if (igt_flush_test(gt->i915))
1250			err = -EIO;
1251		if (err)
1252			return err;
1253	}
1254
1255	return 0;
1256}
1257
1258static struct i915_request *nop_request(struct intel_engine_cs *engine)
1259{
1260	struct i915_request *rq;
1261
1262	rq = intel_engine_create_kernel_request(engine);
1263	if (IS_ERR(rq))
1264		return rq;
1265
1266	i915_request_get(rq);
1267	i915_request_add(rq);
1268
1269	return rq;
1270}
1271
1272static long slice_timeout(struct intel_engine_cs *engine)
1273{
1274	long timeout;
1275
1276	/* Enough time for a timeslice to kick in, and kick out */
1277	timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1278
1279	/* Enough time for the nop request to complete */
1280	timeout += HZ / 5;
1281
1282	return timeout + 1;
1283}
1284
1285static int live_timeslice_queue(void *arg)
1286{
1287	struct intel_gt *gt = arg;
1288	struct drm_i915_gem_object *obj;
1289	struct intel_engine_cs *engine;
1290	enum intel_engine_id id;
1291	struct i915_vma *vma;
1292	void *vaddr;
1293	int err = 0;
1294
1295	/*
1296	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1297	 * timeslicing between them disabled, we *do* enable timeslicing
1298	 * if the queue demands it. (Normally, we do not submit if
1299	 * ELSP[1] is already occupied, so must rely on timeslicing to
1300	 * eject ELSP[0] in favour of the queue.)
1301	 */
1302	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1303		return 0;
1304
1305	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1306	if (IS_ERR(obj))
1307		return PTR_ERR(obj);
1308
1309	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1310	if (IS_ERR(vma)) {
1311		err = PTR_ERR(vma);
1312		goto err_obj;
1313	}
1314
1315	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1316	if (IS_ERR(vaddr)) {
1317		err = PTR_ERR(vaddr);
1318		goto err_obj;
1319	}
1320
1321	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1322	if (err)
1323		goto err_map;
1324
1325	err = i915_vma_sync(vma);
1326	if (err)
1327		goto err_pin;
1328
1329	for_each_engine(engine, gt, id) {
1330		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1331		struct i915_request *rq, *nop;
1332
1333		if (!intel_engine_has_preemption(engine))
1334			continue;
1335
1336		st_engine_heartbeat_disable(engine);
1337		memset(vaddr, 0, PAGE_SIZE);
1338
1339		/* ELSP[0]: semaphore wait */
1340		rq = semaphore_queue(engine, vma, 0);
1341		if (IS_ERR(rq)) {
1342			err = PTR_ERR(rq);
1343			goto err_heartbeat;
1344		}
1345		engine->schedule(rq, &attr);
1346		err = wait_for_submit(engine, rq, HZ / 2);
1347		if (err) {
1348			pr_err("%s: Timed out trying to submit semaphores\n",
1349			       engine->name);
1350			goto err_rq;
1351		}
1352
1353		/* ELSP[1]: nop request */
1354		nop = nop_request(engine);
1355		if (IS_ERR(nop)) {
1356			err = PTR_ERR(nop);
1357			goto err_rq;
1358		}
1359		err = wait_for_submit(engine, nop, HZ / 2);
1360		i915_request_put(nop);
1361		if (err) {
1362			pr_err("%s: Timed out trying to submit nop\n",
1363			       engine->name);
1364			goto err_rq;
1365		}
1366
1367		GEM_BUG_ON(i915_request_completed(rq));
1368		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1369
1370		/* Queue: semaphore signal, matching priority as semaphore */
1371		err = release_queue(engine, vma, 1, effective_prio(rq));
1372		if (err)
1373			goto err_rq;
1374
1375		/* Wait until we ack the release_queue and start timeslicing */
1376		do {
1377			cond_resched();
1378			intel_engine_flush_submission(engine);
1379		} while (READ_ONCE(engine->execlists.pending[0]));
1380
1381		/* Timeslice every jiffy, so within 2 we should signal */
1382		if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1383			struct drm_printer p =
1384				drm_info_printer(gt->i915->drm.dev);
1385
1386			pr_err("%s: Failed to timeslice into queue\n",
1387			       engine->name);
1388			intel_engine_dump(engine, &p,
1389					  "%s\n", engine->name);
1390
1391			memset(vaddr, 0xff, PAGE_SIZE);
1392			err = -EIO;
1393		}
1394err_rq:
1395		i915_request_put(rq);
1396err_heartbeat:
1397		st_engine_heartbeat_enable(engine);
1398		if (err)
1399			break;
1400	}
1401
1402err_pin:
1403	i915_vma_unpin(vma);
1404err_map:
1405	i915_gem_object_unpin_map(obj);
1406err_obj:
1407	i915_gem_object_put(obj);
1408	return err;
1409}
1410
1411static int live_timeslice_nopreempt(void *arg)
1412{
1413	struct intel_gt *gt = arg;
1414	struct intel_engine_cs *engine;
1415	enum intel_engine_id id;
1416	struct igt_spinner spin;
1417	int err = 0;
1418
1419	/*
1420	 * We should not timeslice into a request that is marked with
1421	 * I915_REQUEST_NOPREEMPT.
1422	 */
1423	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1424		return 0;
1425
1426	if (igt_spinner_init(&spin, gt))
1427		return -ENOMEM;
1428
1429	for_each_engine(engine, gt, id) {
1430		struct intel_context *ce;
1431		struct i915_request *rq;
1432		unsigned long timeslice;
1433
1434		if (!intel_engine_has_preemption(engine))
1435			continue;
1436
1437		ce = intel_context_create(engine);
1438		if (IS_ERR(ce)) {
1439			err = PTR_ERR(ce);
1440			break;
1441		}
1442
1443		st_engine_heartbeat_disable(engine);
1444		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1445
1446		/* Create an unpreemptible spinner */
1447
1448		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1449		intel_context_put(ce);
1450		if (IS_ERR(rq)) {
1451			err = PTR_ERR(rq);
1452			goto out_heartbeat;
1453		}
1454
1455		i915_request_get(rq);
1456		i915_request_add(rq);
1457
1458		if (!igt_wait_for_spinner(&spin, rq)) {
1459			i915_request_put(rq);
1460			err = -ETIME;
1461			goto out_spin;
1462		}
1463
1464		set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1465		i915_request_put(rq);
1466
1467		/* Followed by a maximum priority barrier (heartbeat) */
1468
1469		ce = intel_context_create(engine);
1470		if (IS_ERR(ce)) {
1471			err = PTR_ERR(ce);
1472			goto out_spin;
1473		}
1474
1475		rq = intel_context_create_request(ce);
1476		intel_context_put(ce);
1477		if (IS_ERR(rq)) {
1478			err = PTR_ERR(rq);
1479			goto out_spin;
1480		}
1481
1482		rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1483		i915_request_get(rq);
1484		i915_request_add(rq);
1485
1486		/*
1487		 * Wait until the barrier is in ELSP, and we know timeslicing
1488		 * will have been activated.
1489		 */
1490		if (wait_for_submit(engine, rq, HZ / 2)) {
1491			i915_request_put(rq);
1492			err = -ETIME;
1493			goto out_spin;
1494		}
1495
1496		/*
1497		 * Since the ELSP[0] request is unpreemptible, it should not
1498		 * allow the maximum priority barrier through. Wait long
1499		 * enough to see if it is timesliced in by mistake.
1500		 */
1501		if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1502			pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1503			       engine->name);
1504			err = -EINVAL;
1505		}
1506		i915_request_put(rq);
1507
1508out_spin:
1509		igt_spinner_end(&spin);
1510out_heartbeat:
1511		xchg(&engine->props.timeslice_duration_ms, timeslice);
1512		st_engine_heartbeat_enable(engine);
1513		if (err)
1514			break;
1515
1516		if (igt_flush_test(gt->i915)) {
1517			err = -EIO;
1518			break;
1519		}
1520	}
1521
1522	igt_spinner_fini(&spin);
1523	return err;
1524}
1525
1526static int live_busywait_preempt(void *arg)
1527{
1528	struct intel_gt *gt = arg;
1529	struct i915_gem_context *ctx_hi, *ctx_lo;
1530	struct intel_engine_cs *engine;
1531	struct drm_i915_gem_object *obj;
1532	struct i915_vma *vma;
1533	enum intel_engine_id id;
1534	int err = -ENOMEM;
1535	u32 *map;
1536
1537	/*
1538	 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1539	 * preempt the busywaits used to synchronise between rings.
1540	 */
1541
1542	ctx_hi = kernel_context(gt->i915);
1543	if (!ctx_hi)
1544		return -ENOMEM;
1545	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1546
1547	ctx_lo = kernel_context(gt->i915);
1548	if (!ctx_lo)
1549		goto err_ctx_hi;
1550	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1551
1552	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1553	if (IS_ERR(obj)) {
1554		err = PTR_ERR(obj);
1555		goto err_ctx_lo;
1556	}
1557
1558	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1559	if (IS_ERR(map)) {
1560		err = PTR_ERR(map);
1561		goto err_obj;
1562	}
1563
1564	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
1565	if (IS_ERR(vma)) {
1566		err = PTR_ERR(vma);
1567		goto err_map;
1568	}
1569
1570	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1571	if (err)
1572		goto err_map;
1573
1574	err = i915_vma_sync(vma);
1575	if (err)
1576		goto err_vma;
1577
1578	for_each_engine(engine, gt, id) {
1579		struct i915_request *lo, *hi;
1580		struct igt_live_test t;
1581		u32 *cs;
1582
1583		if (!intel_engine_has_preemption(engine))
1584			continue;
1585
1586		if (!intel_engine_can_store_dword(engine))
1587			continue;
1588
1589		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1590			err = -EIO;
1591			goto err_vma;
1592		}
1593
1594		/*
1595		 * We create two requests. The low priority request
1596		 * busywaits on a semaphore (inside the ringbuffer where
1597		 * is should be preemptible) and the high priority requests
1598		 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1599		 * allowing the first request to complete. If preemption
1600		 * fails, we hang instead.
1601		 */
1602
1603		lo = igt_request_alloc(ctx_lo, engine);
1604		if (IS_ERR(lo)) {
1605			err = PTR_ERR(lo);
1606			goto err_vma;
1607		}
1608
1609		cs = intel_ring_begin(lo, 8);
1610		if (IS_ERR(cs)) {
1611			err = PTR_ERR(cs);
1612			i915_request_add(lo);
1613			goto err_vma;
1614		}
1615
1616		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1617		*cs++ = i915_ggtt_offset(vma);
1618		*cs++ = 0;
1619		*cs++ = 1;
1620
1621		/* XXX Do we need a flush + invalidate here? */
1622
1623		*cs++ = MI_SEMAPHORE_WAIT |
1624			MI_SEMAPHORE_GLOBAL_GTT |
1625			MI_SEMAPHORE_POLL |
1626			MI_SEMAPHORE_SAD_EQ_SDD;
1627		*cs++ = 0;
1628		*cs++ = i915_ggtt_offset(vma);
1629		*cs++ = 0;
1630
1631		intel_ring_advance(lo, cs);
1632
1633		i915_request_get(lo);
1634		i915_request_add(lo);
1635
1636		if (wait_for(READ_ONCE(*map), 10)) {
1637			i915_request_put(lo);
1638			err = -ETIMEDOUT;
1639			goto err_vma;
1640		}
1641
1642		/* Low priority request should be busywaiting now */
1643		if (i915_request_wait(lo, 0, 1) != -ETIME) {
1644			i915_request_put(lo);
1645			pr_err("%s: Busywaiting request did not!\n",
1646			       engine->name);
1647			err = -EIO;
1648			goto err_vma;
1649		}
1650
1651		hi = igt_request_alloc(ctx_hi, engine);
1652		if (IS_ERR(hi)) {
1653			err = PTR_ERR(hi);
1654			i915_request_put(lo);
1655			goto err_vma;
1656		}
1657
1658		cs = intel_ring_begin(hi, 4);
1659		if (IS_ERR(cs)) {
1660			err = PTR_ERR(cs);
1661			i915_request_add(hi);
1662			i915_request_put(lo);
1663			goto err_vma;
1664		}
1665
1666		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1667		*cs++ = i915_ggtt_offset(vma);
1668		*cs++ = 0;
1669		*cs++ = 0;
1670
1671		intel_ring_advance(hi, cs);
1672		i915_request_add(hi);
1673
1674		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1675			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1676
1677			pr_err("%s: Failed to preempt semaphore busywait!\n",
1678			       engine->name);
1679
1680			intel_engine_dump(engine, &p, "%s\n", engine->name);
1681			GEM_TRACE_DUMP();
1682
1683			i915_request_put(lo);
1684			intel_gt_set_wedged(gt);
1685			err = -EIO;
1686			goto err_vma;
1687		}
1688		GEM_BUG_ON(READ_ONCE(*map));
1689		i915_request_put(lo);
1690
1691		if (igt_live_test_end(&t)) {
1692			err = -EIO;
1693			goto err_vma;
1694		}
1695	}
1696
1697	err = 0;
1698err_vma:
1699	i915_vma_unpin(vma);
1700err_map:
1701	i915_gem_object_unpin_map(obj);
1702err_obj:
1703	i915_gem_object_put(obj);
1704err_ctx_lo:
1705	kernel_context_close(ctx_lo);
1706err_ctx_hi:
1707	kernel_context_close(ctx_hi);
1708	return err;
1709}
1710
1711static struct i915_request *
1712spinner_create_request(struct igt_spinner *spin,
1713		       struct i915_gem_context *ctx,
1714		       struct intel_engine_cs *engine,
1715		       u32 arb)
1716{
1717	struct intel_context *ce;
1718	struct i915_request *rq;
1719
1720	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1721	if (IS_ERR(ce))
1722		return ERR_CAST(ce);
1723
1724	rq = igt_spinner_create_request(spin, ce, arb);
1725	intel_context_put(ce);
1726	return rq;
1727}
1728
1729static int live_preempt(void *arg)
1730{
1731	struct intel_gt *gt = arg;
1732	struct i915_gem_context *ctx_hi, *ctx_lo;
1733	struct igt_spinner spin_hi, spin_lo;
1734	struct intel_engine_cs *engine;
1735	enum intel_engine_id id;
1736	int err = -ENOMEM;
1737
1738	if (igt_spinner_init(&spin_hi, gt))
1739		return -ENOMEM;
1740
1741	if (igt_spinner_init(&spin_lo, gt))
1742		goto err_spin_hi;
1743
1744	ctx_hi = kernel_context(gt->i915);
1745	if (!ctx_hi)
1746		goto err_spin_lo;
1747	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1748
1749	ctx_lo = kernel_context(gt->i915);
1750	if (!ctx_lo)
1751		goto err_ctx_hi;
1752	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1753
1754	for_each_engine(engine, gt, id) {
1755		struct igt_live_test t;
1756		struct i915_request *rq;
1757
1758		if (!intel_engine_has_preemption(engine))
1759			continue;
1760
1761		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1762			err = -EIO;
1763			goto err_ctx_lo;
1764		}
1765
1766		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1767					    MI_ARB_CHECK);
1768		if (IS_ERR(rq)) {
1769			err = PTR_ERR(rq);
1770			goto err_ctx_lo;
1771		}
1772
1773		i915_request_add(rq);
1774		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1775			GEM_TRACE("lo spinner failed to start\n");
1776			GEM_TRACE_DUMP();
1777			intel_gt_set_wedged(gt);
1778			err = -EIO;
1779			goto err_ctx_lo;
1780		}
1781
1782		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1783					    MI_ARB_CHECK);
1784		if (IS_ERR(rq)) {
1785			igt_spinner_end(&spin_lo);
1786			err = PTR_ERR(rq);
1787			goto err_ctx_lo;
1788		}
1789
1790		i915_request_add(rq);
1791		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1792			GEM_TRACE("hi spinner failed to start\n");
1793			GEM_TRACE_DUMP();
1794			intel_gt_set_wedged(gt);
1795			err = -EIO;
1796			goto err_ctx_lo;
1797		}
1798
1799		igt_spinner_end(&spin_hi);
1800		igt_spinner_end(&spin_lo);
1801
1802		if (igt_live_test_end(&t)) {
1803			err = -EIO;
1804			goto err_ctx_lo;
1805		}
1806	}
1807
1808	err = 0;
1809err_ctx_lo:
1810	kernel_context_close(ctx_lo);
1811err_ctx_hi:
1812	kernel_context_close(ctx_hi);
1813err_spin_lo:
1814	igt_spinner_fini(&spin_lo);
1815err_spin_hi:
1816	igt_spinner_fini(&spin_hi);
1817	return err;
1818}
1819
1820static int live_late_preempt(void *arg)
1821{
1822	struct intel_gt *gt = arg;
1823	struct i915_gem_context *ctx_hi, *ctx_lo;
1824	struct igt_spinner spin_hi, spin_lo;
1825	struct intel_engine_cs *engine;
1826	struct i915_sched_attr attr = {};
1827	enum intel_engine_id id;
1828	int err = -ENOMEM;
1829
1830	if (igt_spinner_init(&spin_hi, gt))
1831		return -ENOMEM;
1832
1833	if (igt_spinner_init(&spin_lo, gt))
1834		goto err_spin_hi;
1835
1836	ctx_hi = kernel_context(gt->i915);
1837	if (!ctx_hi)
1838		goto err_spin_lo;
1839
1840	ctx_lo = kernel_context(gt->i915);
1841	if (!ctx_lo)
1842		goto err_ctx_hi;
1843
1844	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1845	ctx_lo->sched.priority = 1;
1846
1847	for_each_engine(engine, gt, id) {
1848		struct igt_live_test t;
1849		struct i915_request *rq;
1850
1851		if (!intel_engine_has_preemption(engine))
1852			continue;
1853
1854		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1855			err = -EIO;
1856			goto err_ctx_lo;
1857		}
1858
1859		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1860					    MI_ARB_CHECK);
1861		if (IS_ERR(rq)) {
1862			err = PTR_ERR(rq);
1863			goto err_ctx_lo;
1864		}
1865
1866		i915_request_add(rq);
1867		if (!igt_wait_for_spinner(&spin_lo, rq)) {
1868			pr_err("First context failed to start\n");
1869			goto err_wedged;
1870		}
1871
1872		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1873					    MI_NOOP);
1874		if (IS_ERR(rq)) {
1875			igt_spinner_end(&spin_lo);
1876			err = PTR_ERR(rq);
1877			goto err_ctx_lo;
1878		}
1879
1880		i915_request_add(rq);
1881		if (igt_wait_for_spinner(&spin_hi, rq)) {
1882			pr_err("Second context overtook first?\n");
1883			goto err_wedged;
1884		}
1885
1886		attr.priority = I915_PRIORITY_MAX;
1887		engine->schedule(rq, &attr);
1888
1889		if (!igt_wait_for_spinner(&spin_hi, rq)) {
1890			pr_err("High priority context failed to preempt the low priority context\n");
1891			GEM_TRACE_DUMP();
1892			goto err_wedged;
1893		}
1894
1895		igt_spinner_end(&spin_hi);
1896		igt_spinner_end(&spin_lo);
1897
1898		if (igt_live_test_end(&t)) {
1899			err = -EIO;
1900			goto err_ctx_lo;
1901		}
1902	}
1903
1904	err = 0;
1905err_ctx_lo:
1906	kernel_context_close(ctx_lo);
1907err_ctx_hi:
1908	kernel_context_close(ctx_hi);
1909err_spin_lo:
1910	igt_spinner_fini(&spin_lo);
1911err_spin_hi:
1912	igt_spinner_fini(&spin_hi);
1913	return err;
1914
1915err_wedged:
1916	igt_spinner_end(&spin_hi);
1917	igt_spinner_end(&spin_lo);
1918	intel_gt_set_wedged(gt);
1919	err = -EIO;
1920	goto err_ctx_lo;
1921}
1922
1923struct preempt_client {
1924	struct igt_spinner spin;
1925	struct i915_gem_context *ctx;
1926};
1927
1928static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1929{
1930	c->ctx = kernel_context(gt->i915);
1931	if (!c->ctx)
1932		return -ENOMEM;
1933
1934	if (igt_spinner_init(&c->spin, gt))
1935		goto err_ctx;
1936
1937	return 0;
1938
1939err_ctx:
1940	kernel_context_close(c->ctx);
1941	return -ENOMEM;
1942}
1943
1944static void preempt_client_fini(struct preempt_client *c)
1945{
1946	igt_spinner_fini(&c->spin);
1947	kernel_context_close(c->ctx);
1948}
1949
1950static int live_nopreempt(void *arg)
1951{
1952	struct intel_gt *gt = arg;
1953	struct intel_engine_cs *engine;
1954	struct preempt_client a, b;
1955	enum intel_engine_id id;
1956	int err = -ENOMEM;
1957
1958	/*
1959	 * Verify that we can disable preemption for an individual request
1960	 * that may be being observed and not want to be interrupted.
1961	 */
1962
1963	if (preempt_client_init(gt, &a))
1964		return -ENOMEM;
1965	if (preempt_client_init(gt, &b))
1966		goto err_client_a;
1967	b.ctx->sched.priority = I915_PRIORITY_MAX;
1968
1969	for_each_engine(engine, gt, id) {
1970		struct i915_request *rq_a, *rq_b;
1971
1972		if (!intel_engine_has_preemption(engine))
1973			continue;
1974
1975		engine->execlists.preempt_hang.count = 0;
1976
1977		rq_a = spinner_create_request(&a.spin,
1978					      a.ctx, engine,
1979					      MI_ARB_CHECK);
1980		if (IS_ERR(rq_a)) {
1981			err = PTR_ERR(rq_a);
1982			goto err_client_b;
1983		}
1984
1985		/* Low priority client, but unpreemptable! */
1986		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1987
1988		i915_request_add(rq_a);
1989		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1990			pr_err("First client failed to start\n");
1991			goto err_wedged;
1992		}
1993
1994		rq_b = spinner_create_request(&b.spin,
1995					      b.ctx, engine,
1996					      MI_ARB_CHECK);
1997		if (IS_ERR(rq_b)) {
1998			err = PTR_ERR(rq_b);
1999			goto err_client_b;
2000		}
2001
2002		i915_request_add(rq_b);
2003
2004		/* B is much more important than A! (But A is unpreemptable.) */
2005		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
2006
2007		/* Wait long enough for preemption and timeslicing */
2008		if (igt_wait_for_spinner(&b.spin, rq_b)) {
2009			pr_err("Second client started too early!\n");
2010			goto err_wedged;
2011		}
2012
2013		igt_spinner_end(&a.spin);
2014
2015		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2016			pr_err("Second client failed to start\n");
2017			goto err_wedged;
2018		}
2019
2020		igt_spinner_end(&b.spin);
2021
2022		if (engine->execlists.preempt_hang.count) {
2023			pr_err("Preemption recorded x%d; should have been suppressed!\n",
2024			       engine->execlists.preempt_hang.count);
2025			err = -EINVAL;
2026			goto err_wedged;
2027		}
2028
2029		if (igt_flush_test(gt->i915))
2030			goto err_wedged;
2031	}
2032
2033	err = 0;
2034err_client_b:
2035	preempt_client_fini(&b);
2036err_client_a:
2037	preempt_client_fini(&a);
2038	return err;
2039
2040err_wedged:
2041	igt_spinner_end(&b.spin);
2042	igt_spinner_end(&a.spin);
2043	intel_gt_set_wedged(gt);
2044	err = -EIO;
2045	goto err_client_b;
2046}
2047
2048struct live_preempt_cancel {
2049	struct intel_engine_cs *engine;
2050	struct preempt_client a, b;
2051};
2052
2053static int __cancel_active0(struct live_preempt_cancel *arg)
2054{
2055	struct i915_request *rq;
2056	struct igt_live_test t;
2057	int err;
2058
2059	/* Preempt cancel of ELSP0 */
2060	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2061	if (igt_live_test_begin(&t, arg->engine->i915,
2062				__func__, arg->engine->name))
2063		return -EIO;
2064
2065	rq = spinner_create_request(&arg->a.spin,
2066				    arg->a.ctx, arg->engine,
2067				    MI_ARB_CHECK);
2068	if (IS_ERR(rq))
2069		return PTR_ERR(rq);
2070
2071	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2072	i915_request_get(rq);
2073	i915_request_add(rq);
2074	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2075		err = -EIO;
2076		goto out;
2077	}
2078
2079	intel_context_set_banned(rq->context);
2080	err = intel_engine_pulse(arg->engine);
2081	if (err)
2082		goto out;
2083
2084	err = wait_for_reset(arg->engine, rq, HZ / 2);
2085	if (err) {
2086		pr_err("Cancelled inflight0 request did not reset\n");
2087		goto out;
2088	}
2089
2090out:
2091	i915_request_put(rq);
2092	if (igt_live_test_end(&t))
2093		err = -EIO;
2094	return err;
2095}
2096
2097static int __cancel_active1(struct live_preempt_cancel *arg)
2098{
2099	struct i915_request *rq[2] = {};
2100	struct igt_live_test t;
2101	int err;
2102
2103	/* Preempt cancel of ELSP1 */
2104	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2105	if (igt_live_test_begin(&t, arg->engine->i915,
2106				__func__, arg->engine->name))
2107		return -EIO;
2108
2109	rq[0] = spinner_create_request(&arg->a.spin,
2110				       arg->a.ctx, arg->engine,
2111				       MI_NOOP); /* no preemption */
2112	if (IS_ERR(rq[0]))
2113		return PTR_ERR(rq[0]);
2114
2115	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2116	i915_request_get(rq[0]);
2117	i915_request_add(rq[0]);
2118	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2119		err = -EIO;
2120		goto out;
2121	}
2122
2123	rq[1] = spinner_create_request(&arg->b.spin,
2124				       arg->b.ctx, arg->engine,
2125				       MI_ARB_CHECK);
2126	if (IS_ERR(rq[1])) {
2127		err = PTR_ERR(rq[1]);
2128		goto out;
2129	}
2130
2131	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2132	i915_request_get(rq[1]);
2133	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2134	i915_request_add(rq[1]);
2135	if (err)
2136		goto out;
2137
2138	intel_context_set_banned(rq[1]->context);
2139	err = intel_engine_pulse(arg->engine);
2140	if (err)
2141		goto out;
2142
2143	igt_spinner_end(&arg->a.spin);
2144	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2145	if (err)
2146		goto out;
2147
2148	if (rq[0]->fence.error != 0) {
2149		pr_err("Normal inflight0 request did not complete\n");
2150		err = -EINVAL;
2151		goto out;
2152	}
2153
2154	if (rq[1]->fence.error != -EIO) {
2155		pr_err("Cancelled inflight1 request did not report -EIO\n");
2156		err = -EINVAL;
2157		goto out;
2158	}
2159
2160out:
2161	i915_request_put(rq[1]);
2162	i915_request_put(rq[0]);
2163	if (igt_live_test_end(&t))
2164		err = -EIO;
2165	return err;
2166}
2167
2168static int __cancel_queued(struct live_preempt_cancel *arg)
2169{
2170	struct i915_request *rq[3] = {};
2171	struct igt_live_test t;
2172	int err;
2173
2174	/* Full ELSP and one in the wings */
2175	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2176	if (igt_live_test_begin(&t, arg->engine->i915,
2177				__func__, arg->engine->name))
2178		return -EIO;
2179
2180	rq[0] = spinner_create_request(&arg->a.spin,
2181				       arg->a.ctx, arg->engine,
2182				       MI_ARB_CHECK);
2183	if (IS_ERR(rq[0]))
2184		return PTR_ERR(rq[0]);
2185
2186	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2187	i915_request_get(rq[0]);
2188	i915_request_add(rq[0]);
2189	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2190		err = -EIO;
2191		goto out;
2192	}
2193
2194	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2195	if (IS_ERR(rq[1])) {
2196		err = PTR_ERR(rq[1]);
2197		goto out;
2198	}
2199
2200	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2201	i915_request_get(rq[1]);
2202	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2203	i915_request_add(rq[1]);
2204	if (err)
2205		goto out;
2206
2207	rq[2] = spinner_create_request(&arg->b.spin,
2208				       arg->a.ctx, arg->engine,
2209				       MI_ARB_CHECK);
2210	if (IS_ERR(rq[2])) {
2211		err = PTR_ERR(rq[2]);
2212		goto out;
2213	}
2214
2215	i915_request_get(rq[2]);
2216	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2217	i915_request_add(rq[2]);
2218	if (err)
2219		goto out;
2220
2221	intel_context_set_banned(rq[2]->context);
2222	err = intel_engine_pulse(arg->engine);
2223	if (err)
2224		goto out;
2225
2226	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2227	if (err)
2228		goto out;
2229
2230	if (rq[0]->fence.error != -EIO) {
2231		pr_err("Cancelled inflight0 request did not report -EIO\n");
2232		err = -EINVAL;
2233		goto out;
2234	}
2235
2236	if (rq[1]->fence.error != 0) {
2237		pr_err("Normal inflight1 request did not complete\n");
2238		err = -EINVAL;
2239		goto out;
2240	}
2241
2242	if (rq[2]->fence.error != -EIO) {
2243		pr_err("Cancelled queued request did not report -EIO\n");
2244		err = -EINVAL;
2245		goto out;
2246	}
2247
2248out:
2249	i915_request_put(rq[2]);
2250	i915_request_put(rq[1]);
2251	i915_request_put(rq[0]);
2252	if (igt_live_test_end(&t))
2253		err = -EIO;
2254	return err;
2255}
2256
2257static int __cancel_hostile(struct live_preempt_cancel *arg)
2258{
2259	struct i915_request *rq;
2260	int err;
2261
2262	/* Preempt cancel non-preemptible spinner in ELSP0 */
2263	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2264		return 0;
2265
2266	if (!intel_has_reset_engine(arg->engine->gt))
2267		return 0;
2268
2269	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2270	rq = spinner_create_request(&arg->a.spin,
2271				    arg->a.ctx, arg->engine,
2272				    MI_NOOP); /* preemption disabled */
2273	if (IS_ERR(rq))
2274		return PTR_ERR(rq);
2275
2276	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2277	i915_request_get(rq);
2278	i915_request_add(rq);
2279	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2280		err = -EIO;
2281		goto out;
2282	}
2283
2284	intel_context_set_banned(rq->context);
2285	err = intel_engine_pulse(arg->engine); /* force reset */
2286	if (err)
2287		goto out;
2288
2289	err = wait_for_reset(arg->engine, rq, HZ / 2);
2290	if (err) {
2291		pr_err("Cancelled inflight0 request did not reset\n");
2292		goto out;
2293	}
2294
2295out:
2296	i915_request_put(rq);
2297	if (igt_flush_test(arg->engine->i915))
2298		err = -EIO;
2299	return err;
2300}
2301
2302static void force_reset_timeout(struct intel_engine_cs *engine)
2303{
2304	engine->reset_timeout.probability = 999;
2305	atomic_set(&engine->reset_timeout.times, -1);
2306}
2307
2308static void cancel_reset_timeout(struct intel_engine_cs *engine)
2309{
2310	memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2311}
2312
2313static int __cancel_fail(struct live_preempt_cancel *arg)
2314{
2315	struct intel_engine_cs *engine = arg->engine;
2316	struct i915_request *rq;
2317	int err;
2318
2319	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2320		return 0;
2321
2322	if (!intel_has_reset_engine(engine->gt))
2323		return 0;
2324
2325	GEM_TRACE("%s(%s)\n", __func__, engine->name);
2326	rq = spinner_create_request(&arg->a.spin,
2327				    arg->a.ctx, engine,
2328				    MI_NOOP); /* preemption disabled */
2329	if (IS_ERR(rq))
2330		return PTR_ERR(rq);
2331
2332	clear_bit(CONTEXT_BANNED, &rq->context->flags);
2333	i915_request_get(rq);
2334	i915_request_add(rq);
2335	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2336		err = -EIO;
2337		goto out;
2338	}
2339
2340	intel_context_set_banned(rq->context);
2341
2342	err = intel_engine_pulse(engine);
2343	if (err)
2344		goto out;
2345
2346	force_reset_timeout(engine);
2347
2348	/* force preempt reset [failure] */
2349	while (!engine->execlists.pending[0])
2350		intel_engine_flush_submission(engine);
2351	del_timer_sync(&engine->execlists.preempt);
2352	intel_engine_flush_submission(engine);
2353
2354	cancel_reset_timeout(engine);
2355
2356	/* after failure, require heartbeats to reset device */
2357	intel_engine_set_heartbeat(engine, 1);
2358	err = wait_for_reset(engine, rq, HZ / 2);
2359	intel_engine_set_heartbeat(engine,
2360				   engine->defaults.heartbeat_interval_ms);
2361	if (err) {
2362		pr_err("Cancelled inflight0 request did not reset\n");
2363		goto out;
2364	}
2365
2366out:
2367	i915_request_put(rq);
2368	if (igt_flush_test(engine->i915))
2369		err = -EIO;
2370	return err;
2371}
2372
2373static int live_preempt_cancel(void *arg)
2374{
2375	struct intel_gt *gt = arg;
2376	struct live_preempt_cancel data;
2377	enum intel_engine_id id;
2378	int err = -ENOMEM;
2379
2380	/*
2381	 * To cancel an inflight context, we need to first remove it from the
2382	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2383	 */
2384
2385	if (preempt_client_init(gt, &data.a))
2386		return -ENOMEM;
2387	if (preempt_client_init(gt, &data.b))
2388		goto err_client_a;
2389
2390	for_each_engine(data.engine, gt, id) {
2391		if (!intel_engine_has_preemption(data.engine))
2392			continue;
2393
2394		err = __cancel_active0(&data);
2395		if (err)
2396			goto err_wedged;
2397
2398		err = __cancel_active1(&data);
2399		if (err)
2400			goto err_wedged;
2401
2402		err = __cancel_queued(&data);
2403		if (err)
2404			goto err_wedged;
2405
2406		err = __cancel_hostile(&data);
2407		if (err)
2408			goto err_wedged;
2409
2410		err = __cancel_fail(&data);
2411		if (err)
2412			goto err_wedged;
2413	}
2414
2415	err = 0;
2416err_client_b:
2417	preempt_client_fini(&data.b);
2418err_client_a:
2419	preempt_client_fini(&data.a);
2420	return err;
2421
2422err_wedged:
2423	GEM_TRACE_DUMP();
2424	igt_spinner_end(&data.b.spin);
2425	igt_spinner_end(&data.a.spin);
2426	intel_gt_set_wedged(gt);
2427	goto err_client_b;
2428}
2429
2430static int live_suppress_self_preempt(void *arg)
2431{
2432	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2433	struct intel_gt *gt = arg;
2434	struct intel_engine_cs *engine;
2435	struct preempt_client a, b;
2436	enum intel_engine_id id;
2437	int err = -ENOMEM;
2438
2439	/*
2440	 * Verify that if a preemption request does not cause a change in
2441	 * the current execution order, the preempt-to-idle injection is
2442	 * skipped and that we do not accidentally apply it after the CS
2443	 * completion event.
2444	 */
2445
2446	if (intel_uc_uses_guc_submission(&gt->uc))
2447		return 0; /* presume black blox */
2448
2449	if (intel_vgpu_active(gt->i915))
2450		return 0; /* GVT forces single port & request submission */
2451
2452	if (preempt_client_init(gt, &a))
2453		return -ENOMEM;
2454	if (preempt_client_init(gt, &b))
2455		goto err_client_a;
2456
2457	for_each_engine(engine, gt, id) {
2458		struct i915_request *rq_a, *rq_b;
2459		int depth;
2460
2461		if (!intel_engine_has_preemption(engine))
2462			continue;
2463
2464		if (igt_flush_test(gt->i915))
2465			goto err_wedged;
2466
2467		st_engine_heartbeat_disable(engine);
2468		engine->execlists.preempt_hang.count = 0;
2469
2470		rq_a = spinner_create_request(&a.spin,
2471					      a.ctx, engine,
2472					      MI_NOOP);
2473		if (IS_ERR(rq_a)) {
2474			err = PTR_ERR(rq_a);
2475			st_engine_heartbeat_enable(engine);
2476			goto err_client_b;
2477		}
2478
2479		i915_request_add(rq_a);
2480		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2481			pr_err("First client failed to start\n");
2482			st_engine_heartbeat_enable(engine);
2483			goto err_wedged;
2484		}
2485
2486		/* Keep postponing the timer to avoid premature slicing */
2487		mod_timer(&engine->execlists.timer, jiffies + HZ);
2488		for (depth = 0; depth < 8; depth++) {
2489			rq_b = spinner_create_request(&b.spin,
2490						      b.ctx, engine,
2491						      MI_NOOP);
2492			if (IS_ERR(rq_b)) {
2493				err = PTR_ERR(rq_b);
2494				st_engine_heartbeat_enable(engine);
2495				goto err_client_b;
2496			}
2497			i915_request_add(rq_b);
2498
2499			GEM_BUG_ON(i915_request_completed(rq_a));
2500			engine->schedule(rq_a, &attr);
2501			igt_spinner_end(&a.spin);
2502
2503			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2504				pr_err("Second client failed to start\n");
2505				st_engine_heartbeat_enable(engine);
2506				goto err_wedged;
2507			}
2508
2509			swap(a, b);
2510			rq_a = rq_b;
2511		}
2512		igt_spinner_end(&a.spin);
2513
2514		if (engine->execlists.preempt_hang.count) {
2515			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2516			       engine->name,
2517			       engine->execlists.preempt_hang.count,
2518			       depth);
2519			st_engine_heartbeat_enable(engine);
2520			err = -EINVAL;
2521			goto err_client_b;
2522		}
2523
2524		st_engine_heartbeat_enable(engine);
2525		if (igt_flush_test(gt->i915))
2526			goto err_wedged;
2527	}
2528
2529	err = 0;
2530err_client_b:
2531	preempt_client_fini(&b);
2532err_client_a:
2533	preempt_client_fini(&a);
2534	return err;
2535
2536err_wedged:
2537	igt_spinner_end(&b.spin);
2538	igt_spinner_end(&a.spin);
2539	intel_gt_set_wedged(gt);
2540	err = -EIO;
2541	goto err_client_b;
2542}
2543
2544static int live_chain_preempt(void *arg)
2545{
2546	struct intel_gt *gt = arg;
2547	struct intel_engine_cs *engine;
2548	struct preempt_client hi, lo;
2549	enum intel_engine_id id;
2550	int err = -ENOMEM;
2551
2552	/*
2553	 * Build a chain AB...BA between two contexts (A, B) and request
2554	 * preemption of the last request. It should then complete before
2555	 * the previously submitted spinner in B.
2556	 */
2557
2558	if (preempt_client_init(gt, &hi))
2559		return -ENOMEM;
2560
2561	if (preempt_client_init(gt, &lo))
2562		goto err_client_hi;
2563
2564	for_each_engine(engine, gt, id) {
2565		struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2566		struct igt_live_test t;
2567		struct i915_request *rq;
2568		int ring_size, count, i;
2569
2570		if (!intel_engine_has_preemption(engine))
2571			continue;
2572
2573		rq = spinner_create_request(&lo.spin,
2574					    lo.ctx, engine,
2575					    MI_ARB_CHECK);
2576		if (IS_ERR(rq))
2577			goto err_wedged;
2578
2579		i915_request_get(rq);
2580		i915_request_add(rq);
2581
2582		ring_size = rq->wa_tail - rq->head;
2583		if (ring_size < 0)
2584			ring_size += rq->ring->size;
2585		ring_size = rq->ring->size / ring_size;
2586		pr_debug("%s(%s): Using maximum of %d requests\n",
2587			 __func__, engine->name, ring_size);
2588
2589		igt_spinner_end(&lo.spin);
2590		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2591			pr_err("Timed out waiting to flush %s\n", engine->name);
2592			i915_request_put(rq);
2593			goto err_wedged;
2594		}
2595		i915_request_put(rq);
2596
2597		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2598			err = -EIO;
2599			goto err_wedged;
2600		}
2601
2602		for_each_prime_number_from(count, 1, ring_size) {
2603			rq = spinner_create_request(&hi.spin,
2604						    hi.ctx, engine,
2605						    MI_ARB_CHECK);
2606			if (IS_ERR(rq))
2607				goto err_wedged;
2608			i915_request_add(rq);
2609			if (!igt_wait_for_spinner(&hi.spin, rq))
2610				goto err_wedged;
2611
2612			rq = spinner_create_request(&lo.spin,
2613						    lo.ctx, engine,
2614						    MI_ARB_CHECK);
2615			if (IS_ERR(rq))
2616				goto err_wedged;
2617			i915_request_add(rq);
2618
2619			for (i = 0; i < count; i++) {
2620				rq = igt_request_alloc(lo.ctx, engine);
2621				if (IS_ERR(rq))
2622					goto err_wedged;
2623				i915_request_add(rq);
2624			}
2625
2626			rq = igt_request_alloc(hi.ctx, engine);
2627			if (IS_ERR(rq))
2628				goto err_wedged;
2629
2630			i915_request_get(rq);
2631			i915_request_add(rq);
2632			engine->schedule(rq, &attr);
2633
2634			igt_spinner_end(&hi.spin);
2635			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2636				struct drm_printer p =
2637					drm_info_printer(gt->i915->drm.dev);
2638
2639				pr_err("Failed to preempt over chain of %d\n",
2640				       count);
2641				intel_engine_dump(engine, &p,
2642						  "%s\n", engine->name);
2643				i915_request_put(rq);
2644				goto err_wedged;
2645			}
2646			igt_spinner_end(&lo.spin);
2647			i915_request_put(rq);
2648
2649			rq = igt_request_alloc(lo.ctx, engine);
2650			if (IS_ERR(rq))
2651				goto err_wedged;
2652
2653			i915_request_get(rq);
2654			i915_request_add(rq);
2655
2656			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2657				struct drm_printer p =
2658					drm_info_printer(gt->i915->drm.dev);
2659
2660				pr_err("Failed to flush low priority chain of %d requests\n",
2661				       count);
2662				intel_engine_dump(engine, &p,
2663						  "%s\n", engine->name);
2664
2665				i915_request_put(rq);
2666				goto err_wedged;
2667			}
2668			i915_request_put(rq);
2669		}
2670
2671		if (igt_live_test_end(&t)) {
2672			err = -EIO;
2673			goto err_wedged;
2674		}
2675	}
2676
2677	err = 0;
2678err_client_lo:
2679	preempt_client_fini(&lo);
2680err_client_hi:
2681	preempt_client_fini(&hi);
2682	return err;
2683
2684err_wedged:
2685	igt_spinner_end(&hi.spin);
2686	igt_spinner_end(&lo.spin);
2687	intel_gt_set_wedged(gt);
2688	err = -EIO;
2689	goto err_client_lo;
2690}
2691
2692static int create_gang(struct intel_engine_cs *engine,
2693		       struct i915_request **prev)
2694{
2695	struct drm_i915_gem_object *obj;
2696	struct intel_context *ce;
2697	struct i915_request *rq;
2698	struct i915_vma *vma;
2699	u32 *cs;
2700	int err;
2701
2702	ce = intel_context_create(engine);
2703	if (IS_ERR(ce))
2704		return PTR_ERR(ce);
2705
2706	obj = i915_gem_object_create_internal(engine->i915, 4096);
2707	if (IS_ERR(obj)) {
2708		err = PTR_ERR(obj);
2709		goto err_ce;
2710	}
2711
2712	vma = i915_vma_instance(obj, ce->vm, NULL);
2713	if (IS_ERR(vma)) {
2714		err = PTR_ERR(vma);
2715		goto err_obj;
2716	}
2717
2718	err = i915_vma_pin(vma, 0, 0, PIN_USER);
2719	if (err)
2720		goto err_obj;
2721
2722	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2723	if (IS_ERR(cs)) {
2724		err = PTR_ERR(cs);
2725		goto err_obj;
2726	}
2727
2728	/* Semaphore target: spin until zero */
2729	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2730
2731	*cs++ = MI_SEMAPHORE_WAIT |
2732		MI_SEMAPHORE_POLL |
2733		MI_SEMAPHORE_SAD_EQ_SDD;
2734	*cs++ = 0;
2735	*cs++ = lower_32_bits(vma->node.start);
2736	*cs++ = upper_32_bits(vma->node.start);
2737
2738	if (*prev) {
2739		u64 offset = (*prev)->batch->node.start;
2740
2741		/* Terminate the spinner in the next lower priority batch. */
2742		*cs++ = MI_STORE_DWORD_IMM_GEN4;
2743		*cs++ = lower_32_bits(offset);
2744		*cs++ = upper_32_bits(offset);
2745		*cs++ = 0;
2746	}
2747
2748	*cs++ = MI_BATCH_BUFFER_END;
2749	i915_gem_object_flush_map(obj);
2750	i915_gem_object_unpin_map(obj);
2751
2752	rq = intel_context_create_request(ce);
2753	if (IS_ERR(rq)) {
2754		err = PTR_ERR(rq);
2755		goto err_obj;
2756	}
2757
2758	rq->batch = i915_vma_get(vma);
2759	i915_request_get(rq);
2760
2761	i915_vma_lock(vma);
2762	err = i915_request_await_object(rq, vma->obj, false);
2763	if (!err)
2764		err = i915_vma_move_to_active(vma, rq, 0);
2765	if (!err)
2766		err = rq->engine->emit_bb_start(rq,
2767						vma->node.start,
2768						PAGE_SIZE, 0);
2769	i915_vma_unlock(vma);
2770	i915_request_add(rq);
2771	if (err)
2772		goto err_rq;
2773
2774	i915_gem_object_put(obj);
2775	intel_context_put(ce);
2776
2777	rq->mock.link.next = &(*prev)->mock.link;
2778	*prev = rq;
2779	return 0;
2780
2781err_rq:
2782	i915_vma_put(rq->batch);
2783	i915_request_put(rq);
2784err_obj:
2785	i915_gem_object_put(obj);
2786err_ce:
2787	intel_context_put(ce);
2788	return err;
2789}
2790
2791static int __live_preempt_ring(struct intel_engine_cs *engine,
2792			       struct igt_spinner *spin,
2793			       int queue_sz, int ring_sz)
2794{
2795	struct intel_context *ce[2] = {};
2796	struct i915_request *rq;
2797	struct igt_live_test t;
2798	int err = 0;
2799	int n;
2800
2801	if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2802		return -EIO;
2803
2804	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2805		struct intel_context *tmp;
2806
2807		tmp = intel_context_create(engine);
2808		if (IS_ERR(tmp)) {
2809			err = PTR_ERR(tmp);
2810			goto err_ce;
2811		}
2812
2813		tmp->ring = __intel_context_ring_size(ring_sz);
2814
2815		err = intel_context_pin(tmp);
2816		if (err) {
2817			intel_context_put(tmp);
2818			goto err_ce;
2819		}
2820
2821		memset32(tmp->ring->vaddr,
2822			 0xdeadbeef, /* trigger a hang if executed */
2823			 tmp->ring->vma->size / sizeof(u32));
2824
2825		ce[n] = tmp;
2826	}
2827
2828	rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2829	if (IS_ERR(rq)) {
2830		err = PTR_ERR(rq);
2831		goto err_ce;
2832	}
2833
2834	i915_request_get(rq);
2835	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2836	i915_request_add(rq);
2837
2838	if (!igt_wait_for_spinner(spin, rq)) {
2839		intel_gt_set_wedged(engine->gt);
2840		i915_request_put(rq);
2841		err = -ETIME;
2842		goto err_ce;
2843	}
2844
2845	/* Fill the ring, until we will cause a wrap */
2846	n = 0;
2847	while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2848		struct i915_request *tmp;
2849
2850		tmp = intel_context_create_request(ce[0]);
2851		if (IS_ERR(tmp)) {
2852			err = PTR_ERR(tmp);
2853			i915_request_put(rq);
2854			goto err_ce;
2855		}
2856
2857		i915_request_add(tmp);
2858		intel_engine_flush_submission(engine);
2859		n++;
2860	}
2861	intel_engine_flush_submission(engine);
2862	pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2863		 engine->name, queue_sz, n,
2864		 ce[0]->ring->size,
2865		 ce[0]->ring->tail,
2866		 ce[0]->ring->emit,
2867		 rq->tail);
2868	i915_request_put(rq);
2869
2870	/* Create a second request to preempt the first ring */
2871	rq = intel_context_create_request(ce[1]);
2872	if (IS_ERR(rq)) {
2873		err = PTR_ERR(rq);
2874		goto err_ce;
2875	}
2876
2877	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2878	i915_request_get(rq);
2879	i915_request_add(rq);
2880
2881	err = wait_for_submit(engine, rq, HZ / 2);
2882	i915_request_put(rq);
2883	if (err) {
2884		pr_err("%s: preemption request was not submitted\n",
2885		       engine->name);
2886		err = -ETIME;
2887	}
2888
2889	pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2890		 engine->name,
2891		 ce[0]->ring->tail, ce[0]->ring->emit,
2892		 ce[1]->ring->tail, ce[1]->ring->emit);
2893
2894err_ce:
2895	intel_engine_flush_submission(engine);
2896	igt_spinner_end(spin);
2897	for (n = 0; n < ARRAY_SIZE(ce); n++) {
2898		if (IS_ERR_OR_NULL(ce[n]))
2899			break;
2900
2901		intel_context_unpin(ce[n]);
2902		intel_context_put(ce[n]);
2903	}
2904	if (igt_live_test_end(&t))
2905		err = -EIO;
2906	return err;
2907}
2908
2909static int live_preempt_ring(void *arg)
2910{
2911	struct intel_gt *gt = arg;
2912	struct intel_engine_cs *engine;
2913	struct igt_spinner spin;
2914	enum intel_engine_id id;
2915	int err = 0;
2916
2917	/*
2918	 * Check that we rollback large chunks of a ring in order to do a
2919	 * preemption event. Similar to live_unlite_ring, but looking at
2920	 * ring size rather than the impact of intel_ring_direction().
2921	 */
2922
2923	if (igt_spinner_init(&spin, gt))
2924		return -ENOMEM;
2925
2926	for_each_engine(engine, gt, id) {
2927		int n;
2928
2929		if (!intel_engine_has_preemption(engine))
2930			continue;
2931
2932		if (!intel_engine_can_store_dword(engine))
2933			continue;
2934
2935		st_engine_heartbeat_disable(engine);
2936
2937		for (n = 0; n <= 3; n++) {
2938			err = __live_preempt_ring(engine, &spin,
2939						  n * SZ_4K / 4, SZ_4K);
2940			if (err)
2941				break;
2942		}
2943
2944		st_engine_heartbeat_enable(engine);
2945		if (err)
2946			break;
2947	}
2948
2949	igt_spinner_fini(&spin);
2950	return err;
2951}
2952
2953static int live_preempt_gang(void *arg)
2954{
2955	struct intel_gt *gt = arg;
2956	struct intel_engine_cs *engine;
2957	enum intel_engine_id id;
2958
2959	/*
2960	 * Build as long a chain of preempters as we can, with each
2961	 * request higher priority than the last. Once we are ready, we release
2962	 * the last batch which then precolates down the chain, each releasing
2963	 * the next oldest in turn. The intent is to simply push as hard as we
2964	 * can with the number of preemptions, trying to exceed narrow HW
2965	 * limits. At a minimum, we insist that we can sort all the user
2966	 * high priority levels into execution order.
2967	 */
2968
2969	for_each_engine(engine, gt, id) {
2970		struct i915_request *rq = NULL;
2971		struct igt_live_test t;
2972		IGT_TIMEOUT(end_time);
2973		int prio = 0;
2974		int err = 0;
2975		u32 *cs;
2976
2977		if (!intel_engine_has_preemption(engine))
2978			continue;
2979
2980		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2981			return -EIO;
2982
2983		do {
2984			struct i915_sched_attr attr = { .priority = prio++ };
2985
2986			err = create_gang(engine, &rq);
2987			if (err)
2988				break;
2989
2990			/* Submit each spinner at increasing priority */
2991			engine->schedule(rq, &attr);
2992		} while (prio <= I915_PRIORITY_MAX &&
2993			 !__igt_timeout(end_time, NULL));
2994		pr_debug("%s: Preempt chain of %d requests\n",
2995			 engine->name, prio);
2996
2997		/*
2998		 * Such that the last spinner is the highest priority and
2999		 * should execute first. When that spinner completes,
3000		 * it will terminate the next lowest spinner until there
3001		 * are no more spinners and the gang is complete.
3002		 */
3003		cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
3004		if (!IS_ERR(cs)) {
3005			*cs = 0;
3006			i915_gem_object_unpin_map(rq->batch->obj);
3007		} else {
3008			err = PTR_ERR(cs);
3009			intel_gt_set_wedged(gt);
3010		}
3011
3012		while (rq) { /* wait for each rq from highest to lowest prio */
3013			struct i915_request *n = list_next_entry(rq, mock.link);
3014
3015			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
3016				struct drm_printer p =
3017					drm_info_printer(engine->i915->drm.dev);
3018
3019				pr_err("Failed to flush chain of %d requests, at %d\n",
3020				       prio, rq_prio(rq));
3021				intel_engine_dump(engine, &p,
3022						  "%s\n", engine->name);
3023
3024				err = -ETIME;
3025			}
3026
3027			i915_vma_put(rq->batch);
3028			i915_request_put(rq);
3029			rq = n;
3030		}
3031
3032		if (igt_live_test_end(&t))
3033			err = -EIO;
3034		if (err)
3035			return err;
3036	}
3037
3038	return 0;
3039}
3040
3041static struct i915_vma *
3042create_gpr_user(struct intel_engine_cs *engine,
3043		struct i915_vma *result,
3044		unsigned int offset)
3045{
3046	struct drm_i915_gem_object *obj;
3047	struct i915_vma *vma;
3048	u32 *cs;
3049	int err;
3050	int i;
3051
3052	obj = i915_gem_object_create_internal(engine->i915, 4096);
3053	if (IS_ERR(obj))
3054		return ERR_CAST(obj);
3055
3056	vma = i915_vma_instance(obj, result->vm, NULL);
3057	if (IS_ERR(vma)) {
3058		i915_gem_object_put(obj);
3059		return vma;
3060	}
3061
3062	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3063	if (err) {
3064		i915_vma_put(vma);
3065		return ERR_PTR(err);
3066	}
3067
3068	cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3069	if (IS_ERR(cs)) {
3070		i915_vma_put(vma);
3071		return ERR_CAST(cs);
3072	}
3073
3074	/* All GPR are clear for new contexts. We use GPR(0) as a constant */
3075	*cs++ = MI_LOAD_REGISTER_IMM(1);
3076	*cs++ = CS_GPR(engine, 0);
3077	*cs++ = 1;
3078
3079	for (i = 1; i < NUM_GPR; i++) {
3080		u64 addr;
3081
3082		/*
3083		 * Perform: GPR[i]++
3084		 *
3085		 * As we read and write into the context saved GPR[i], if
3086		 * we restart this batch buffer from an earlier point, we
3087		 * will repeat the increment and store a value > 1.
3088		 */
3089		*cs++ = MI_MATH(4);
3090		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3091		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3092		*cs++ = MI_MATH_ADD;
3093		*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3094
3095		addr = result->node.start + offset + i * sizeof(*cs);
3096		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
3097		*cs++ = CS_GPR(engine, 2 * i);
3098		*cs++ = lower_32_bits(addr);
3099		*cs++ = upper_32_bits(addr);
3100
3101		*cs++ = MI_SEMAPHORE_WAIT |
3102			MI_SEMAPHORE_POLL |
3103			MI_SEMAPHORE_SAD_GTE_SDD;
3104		*cs++ = i;
3105		*cs++ = lower_32_bits(result->node.start);
3106		*cs++ = upper_32_bits(result->node.start);
3107	}
3108
3109	*cs++ = MI_BATCH_BUFFER_END;
3110	i915_gem_object_flush_map(obj);
3111	i915_gem_object_unpin_map(obj);
3112
3113	return vma;
3114}
3115
3116static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3117{
3118	struct drm_i915_gem_object *obj;
3119	struct i915_vma *vma;
3120	int err;
3121
3122	obj = i915_gem_object_create_internal(gt->i915, sz);
3123	if (IS_ERR(obj))
3124		return ERR_CAST(obj);
3125
3126	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
3127	if (IS_ERR(vma)) {
3128		i915_gem_object_put(obj);
3129		return vma;
3130	}
3131
3132	err = i915_ggtt_pin(vma, NULL, 0, 0);
3133	if (err) {
3134		i915_vma_put(vma);
3135		return ERR_PTR(err);
3136	}
3137
3138	return vma;
3139}
3140
3141static struct i915_request *
3142create_gpr_client(struct intel_engine_cs *engine,
3143		  struct i915_vma *global,
3144		  unsigned int offset)
3145{
3146	struct i915_vma *batch, *vma;
3147	struct intel_context *ce;
3148	struct i915_request *rq;
3149	int err;
3150
3151	ce = intel_context_create(engine);
3152	if (IS_ERR(ce))
3153		return ERR_CAST(ce);
3154
3155	vma = i915_vma_instance(global->obj, ce->vm, NULL);
3156	if (IS_ERR(vma)) {
3157		err = PTR_ERR(vma);
3158		goto out_ce;
3159	}
3160
3161	err = i915_vma_pin(vma, 0, 0, PIN_USER);
3162	if (err)
3163		goto out_ce;
3164
3165	batch = create_gpr_user(engine, vma, offset);
3166	if (IS_ERR(batch)) {
3167		err = PTR_ERR(batch);
3168		goto out_vma;
3169	}
3170
3171	rq = intel_context_create_request(ce);
3172	if (IS_ERR(rq)) {
3173		err = PTR_ERR(rq);
3174		goto out_batch;
3175	}
3176
3177	i915_vma_lock(vma);
3178	err = i915_request_await_object(rq, vma->obj, false);
3179	if (!err)
3180		err = i915_vma_move_to_active(vma, rq, 0);
3181	i915_vma_unlock(vma);
3182
3183	i915_vma_lock(batch);
3184	if (!err)
3185		err = i915_request_await_object(rq, batch->obj, false);
3186	if (!err)
3187		err = i915_vma_move_to_active(batch, rq, 0);
3188	if (!err)
3189		err = rq->engine->emit_bb_start(rq,
3190						batch->node.start,
3191						PAGE_SIZE, 0);
3192	i915_vma_unlock(batch);
3193	i915_vma_unpin(batch);
3194
3195	if (!err)
3196		i915_request_get(rq);
3197	i915_request_add(rq);
3198
3199out_batch:
3200	i915_vma_put(batch);
3201out_vma:
3202	i915_vma_unpin(vma);
3203out_ce:
3204	intel_context_put(ce);
3205	return err ? ERR_PTR(err) : rq;
3206}
3207
3208static int preempt_user(struct intel_engine_cs *engine,
3209			struct i915_vma *global,
3210			int id)
3211{
3212	struct i915_sched_attr attr = {
3213		.priority = I915_PRIORITY_MAX
3214	};
3215	struct i915_request *rq;
3216	int err = 0;
3217	u32 *cs;
3218
3219	rq = intel_engine_create_kernel_request(engine);
3220	if (IS_ERR(rq))
3221		return PTR_ERR(rq);
3222
3223	cs = intel_ring_begin(rq, 4);
3224	if (IS_ERR(cs)) {
3225		i915_request_add(rq);
3226		return PTR_ERR(cs);
3227	}
3228
3229	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3230	*cs++ = i915_ggtt_offset(global);
3231	*cs++ = 0;
3232	*cs++ = id;
3233
3234	intel_ring_advance(rq, cs);
3235
3236	i915_request_get(rq);
3237	i915_request_add(rq);
3238
3239	engine->schedule(rq, &attr);
3240
3241	if (i915_request_wait(rq, 0, HZ / 2) < 0)
3242		err = -ETIME;
3243	i915_request_put(rq);
3244
3245	return err;
3246}
3247
3248static int live_preempt_user(void *arg)
3249{
3250	struct intel_gt *gt = arg;
3251	struct intel_engine_cs *engine;
3252	struct i915_vma *global;
3253	enum intel_engine_id id;
3254	u32 *result;
3255	int err = 0;
3256
3257	/*
3258	 * In our other tests, we look at preemption in carefully
3259	 * controlled conditions in the ringbuffer. Since most of the
3260	 * time is spent in user batches, most of our preemptions naturally
3261	 * occur there. We want to verify that when we preempt inside a batch
3262	 * we continue on from the current instruction and do not roll back
3263	 * to the start, or another earlier arbitration point.
3264	 *
3265	 * To verify this, we create a batch which is a mixture of
3266	 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3267	 * a few preempting contexts thrown into the mix, we look for any
3268	 * repeated instructions (which show up as incorrect values).
3269	 */
3270
3271	global = create_global(gt, 4096);
3272	if (IS_ERR(global))
3273		return PTR_ERR(global);
3274
3275	result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3276	if (IS_ERR(result)) {
3277		i915_vma_unpin_and_release(&global, 0);
3278		return PTR_ERR(result);
3279	}
3280
3281	for_each_engine(engine, gt, id) {
3282		struct i915_request *client[3] = {};
3283		struct igt_live_test t;
3284		int i;
3285
3286		if (!intel_engine_has_preemption(engine))
3287			continue;
3288
3289		if (GRAPHICS_VER(gt->i915) == 8 && engine->class != RENDER_CLASS)
3290			continue; /* we need per-context GPR */
3291
3292		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3293			err = -EIO;
3294			break;
3295		}
3296
3297		memset(result, 0, 4096);
3298
3299		for (i = 0; i < ARRAY_SIZE(client); i++) {
3300			struct i915_request *rq;
3301
3302			rq = create_gpr_client(engine, global,
3303					       NUM_GPR * i * sizeof(u32));
3304			if (IS_ERR(rq)) {
3305				err = PTR_ERR(rq);
3306				goto end_test;
3307			}
3308
3309			client[i] = rq;
3310		}
3311
3312		/* Continuously preempt the set of 3 running contexts */
3313		for (i = 1; i <= NUM_GPR; i++) {
3314			err = preempt_user(engine, global, i);
3315			if (err)
3316				goto end_test;
3317		}
3318
3319		if (READ_ONCE(result[0]) != NUM_GPR) {
3320			pr_err("%s: Failed to release semaphore\n",
3321			       engine->name);
3322			err = -EIO;
3323			goto end_test;
3324		}
3325
3326		for (i = 0; i < ARRAY_SIZE(client); i++) {
3327			int gpr;
3328
3329			if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3330				err = -ETIME;
3331				goto end_test;
3332			}
3333
3334			for (gpr = 1; gpr < NUM_GPR; gpr++) {
3335				if (result[NUM_GPR * i + gpr] != 1) {
3336					pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3337					       engine->name,
3338					       i, gpr, result[NUM_GPR * i + gpr]);
3339					err = -EINVAL;
3340					goto end_test;
3341				}
3342			}
3343		}
3344
3345end_test:
3346		for (i = 0; i < ARRAY_SIZE(client); i++) {
3347			if (!client[i])
3348				break;
3349
3350			i915_request_put(client[i]);
3351		}
3352
3353		/* Flush the semaphores on error */
3354		smp_store_mb(result[0], -1);
3355		if (igt_live_test_end(&t))
3356			err = -EIO;
3357		if (err)
3358			break;
3359	}
3360
3361	i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3362	return err;
3363}
3364
3365static int live_preempt_timeout(void *arg)
3366{
3367	struct intel_gt *gt = arg;
3368	struct i915_gem_context *ctx_hi, *ctx_lo;
3369	struct igt_spinner spin_lo;
3370	struct intel_engine_cs *engine;
3371	enum intel_engine_id id;
3372	int err = -ENOMEM;
3373
3374	/*
3375	 * Check that we force preemption to occur by cancelling the previous
3376	 * context if it refuses to yield the GPU.
3377	 */
3378	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3379		return 0;
3380
3381	if (!intel_has_reset_engine(gt))
3382		return 0;
3383
3384	if (igt_spinner_init(&spin_lo, gt))
3385		return -ENOMEM;
3386
3387	ctx_hi = kernel_context(gt->i915);
3388	if (!ctx_hi)
3389		goto err_spin_lo;
3390	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3391
3392	ctx_lo = kernel_context(gt->i915);
3393	if (!ctx_lo)
3394		goto err_ctx_hi;
3395	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3396
3397	for_each_engine(engine, gt, id) {
3398		unsigned long saved_timeout;
3399		struct i915_request *rq;
3400
3401		if (!intel_engine_has_preemption(engine))
3402			continue;
3403
3404		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3405					    MI_NOOP); /* preemption disabled */
3406		if (IS_ERR(rq)) {
3407			err = PTR_ERR(rq);
3408			goto err_ctx_lo;
3409		}
3410
3411		i915_request_add(rq);
3412		if (!igt_wait_for_spinner(&spin_lo, rq)) {
3413			intel_gt_set_wedged(gt);
3414			err = -EIO;
3415			goto err_ctx_lo;
3416		}
3417
3418		rq = igt_request_alloc(ctx_hi, engine);
3419		if (IS_ERR(rq)) {
3420			igt_spinner_end(&spin_lo);
3421			err = PTR_ERR(rq);
3422			goto err_ctx_lo;
3423		}
3424
3425		/* Flush the previous CS ack before changing timeouts */
3426		while (READ_ONCE(engine->execlists.pending[0]))
3427			cpu_relax();
3428
3429		saved_timeout = engine->props.preempt_timeout_ms;
3430		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3431
3432		i915_request_get(rq);
3433		i915_request_add(rq);
3434
3435		intel_engine_flush_submission(engine);
3436		engine->props.preempt_timeout_ms = saved_timeout;
3437
3438		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3439			intel_gt_set_wedged(gt);
3440			i915_request_put(rq);
3441			err = -ETIME;
3442			goto err_ctx_lo;
3443		}
3444
3445		igt_spinner_end(&spin_lo);
3446		i915_request_put(rq);
3447	}
3448
3449	err = 0;
3450err_ctx_lo:
3451	kernel_context_close(ctx_lo);
3452err_ctx_hi:
3453	kernel_context_close(ctx_hi);
3454err_spin_lo:
3455	igt_spinner_fini(&spin_lo);
3456	return err;
3457}
3458
3459static int random_range(struct rnd_state *rnd, int min, int max)
3460{
3461	return i915_prandom_u32_max_state(max - min, rnd) + min;
3462}
3463
3464static int random_priority(struct rnd_state *rnd)
3465{
3466	return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3467}
3468
3469struct preempt_smoke {
3470	struct intel_gt *gt;
3471	struct i915_gem_context **contexts;
3472	struct intel_engine_cs *engine;
3473	struct drm_i915_gem_object *batch;
3474	unsigned int ncontext;
3475	struct rnd_state prng;
3476	unsigned long count;
3477};
3478
3479static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3480{
3481	return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3482							  &smoke->prng)];
3483}
3484
3485static int smoke_submit(struct preempt_smoke *smoke,
3486			struct i915_gem_context *ctx, int prio,
3487			struct drm_i915_gem_object *batch)
3488{
3489	struct i915_request *rq;
3490	struct i915_vma *vma = NULL;
3491	int err = 0;
3492
3493	if (batch) {
3494		struct i915_address_space *vm;
3495
3496		vm = i915_gem_context_get_vm_rcu(ctx);
3497		vma = i915_vma_instance(batch, vm, NULL);
3498		i915_vm_put(vm);
3499		if (IS_ERR(vma))
3500			return PTR_ERR(vma);
3501
3502		err = i915_vma_pin(vma, 0, 0, PIN_USER);
3503		if (err)
3504			return err;
3505	}
3506
3507	ctx->sched.priority = prio;
3508
3509	rq = igt_request_alloc(ctx, smoke->engine);
3510	if (IS_ERR(rq)) {
3511		err = PTR_ERR(rq);
3512		goto unpin;
3513	}
3514
3515	if (vma) {
3516		i915_vma_lock(vma);
3517		err = i915_request_await_object(rq, vma->obj, false);
3518		if (!err)
3519			err = i915_vma_move_to_active(vma, rq, 0);
3520		if (!err)
3521			err = rq->engine->emit_bb_start(rq,
3522							vma->node.start,
3523							PAGE_SIZE, 0);
3524		i915_vma_unlock(vma);
3525	}
3526
3527	i915_request_add(rq);
3528
3529unpin:
3530	if (vma)
3531		i915_vma_unpin(vma);
3532
3533	return err;
3534}
3535
3536static int smoke_crescendo_thread(void *arg)
3537{
3538	struct preempt_smoke *smoke = arg;
3539	IGT_TIMEOUT(end_time);
3540	unsigned long count;
3541
3542	count = 0;
3543	do {
3544		struct i915_gem_context *ctx = smoke_context(smoke);
3545		int err;
3546
3547		err = smoke_submit(smoke,
3548				   ctx, count % I915_PRIORITY_MAX,
3549				   smoke->batch);
3550		if (err)
3551			return err;
3552
3553		count++;
3554	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3555
3556	smoke->count = count;
3557	return 0;
3558}
3559
3560static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3561#define BATCH BIT(0)
3562{
3563	struct task_struct *tsk[I915_NUM_ENGINES] = {};
3564	struct preempt_smoke arg[I915_NUM_ENGINES];
3565	struct intel_engine_cs *engine;
3566	enum intel_engine_id id;
3567	unsigned long count;
3568	int err = 0;
3569
3570	for_each_engine(engine, smoke->gt, id) {
3571		arg[id] = *smoke;
3572		arg[id].engine = engine;
3573		if (!(flags & BATCH))
3574			arg[id].batch = NULL;
3575		arg[id].count = 0;
3576
3577		tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3578				      "igt/smoke:%d", id);
3579		if (IS_ERR(tsk[id])) {
3580			err = PTR_ERR(tsk[id]);
3581			break;
3582		}
3583		get_task_struct(tsk[id]);
3584	}
3585
3586	yield(); /* start all threads before we kthread_stop() */
3587
3588	count = 0;
3589	for_each_engine(engine, smoke->gt, id) {
3590		int status;
3591
3592		if (IS_ERR_OR_NULL(tsk[id]))
3593			continue;
3594
3595		status = kthread_stop(tsk[id]);
3596		if (status && !err)
3597			err = status;
3598
3599		count += arg[id].count;
3600
3601		put_task_struct(tsk[id]);
3602	}
3603
3604	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3605		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3606	return 0;
3607}
3608
3609static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3610{
3611	enum intel_engine_id id;
3612	IGT_TIMEOUT(end_time);
3613	unsigned long count;
3614
3615	count = 0;
3616	do {
3617		for_each_engine(smoke->engine, smoke->gt, id) {
3618			struct i915_gem_context *ctx = smoke_context(smoke);
3619			int err;
3620
3621			err = smoke_submit(smoke,
3622					   ctx, random_priority(&smoke->prng),
3623					   flags & BATCH ? smoke->batch : NULL);
3624			if (err)
3625				return err;
3626
3627			count++;
3628		}
3629	} while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3630
3631	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3632		count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3633	return 0;
3634}
3635
3636static int live_preempt_smoke(void *arg)
3637{
3638	struct preempt_smoke smoke = {
3639		.gt = arg,
3640		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3641		.ncontext = 256,
3642	};
3643	const unsigned int phase[] = { 0, BATCH };
3644	struct igt_live_test t;
3645	int err = -ENOMEM;
3646	u32 *cs;
3647	int n;
3648
3649	smoke.contexts = kmalloc_array(smoke.ncontext,
3650				       sizeof(*smoke.contexts),
3651				       GFP_KERNEL);
3652	if (!smoke.contexts)
3653		return -ENOMEM;
3654
3655	smoke.batch =
3656		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3657	if (IS_ERR(smoke.batch)) {
3658		err = PTR_ERR(smoke.batch);
3659		goto err_free;
3660	}
3661
3662	cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3663	if (IS_ERR(cs)) {
3664		err = PTR_ERR(cs);
3665		goto err_batch;
3666	}
3667	for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3668		cs[n] = MI_ARB_CHECK;
3669	cs[n] = MI_BATCH_BUFFER_END;
3670	i915_gem_object_flush_map(smoke.batch);
3671	i915_gem_object_unpin_map(smoke.batch);
3672
3673	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3674		err = -EIO;
3675		goto err_batch;
3676	}
3677
3678	for (n = 0; n < smoke.ncontext; n++) {
3679		smoke.contexts[n] = kernel_context(smoke.gt->i915);
3680		if (!smoke.contexts[n])
3681			goto err_ctx;
3682	}
3683
3684	for (n = 0; n < ARRAY_SIZE(phase); n++) {
3685		err = smoke_crescendo(&smoke, phase[n]);
3686		if (err)
3687			goto err_ctx;
3688
3689		err = smoke_random(&smoke, phase[n]);
3690		if (err)
3691			goto err_ctx;
3692	}
3693
3694err_ctx:
3695	if (igt_live_test_end(&t))
3696		err = -EIO;
3697
3698	for (n = 0; n < smoke.ncontext; n++) {
3699		if (!smoke.contexts[n])
3700			break;
3701		kernel_context_close(smoke.contexts[n]);
3702	}
3703
3704err_batch:
3705	i915_gem_object_put(smoke.batch);
3706err_free:
3707	kfree(smoke.contexts);
3708
3709	return err;
3710}
3711
3712static int nop_virtual_engine(struct intel_gt *gt,
3713			      struct intel_engine_cs **siblings,
3714			      unsigned int nsibling,
3715			      unsigned int nctx,
3716			      unsigned int flags)
3717#define CHAIN BIT(0)
3718{
3719	IGT_TIMEOUT(end_time);
3720	struct i915_request *request[16] = {};
3721	struct intel_context *ve[16];
3722	unsigned long n, prime, nc;
3723	struct igt_live_test t;
3724	ktime_t times[2] = {};
3725	int err;
3726
3727	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3728
3729	for (n = 0; n < nctx; n++) {
3730		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3731		if (IS_ERR(ve[n])) {
3732			err = PTR_ERR(ve[n]);
3733			nctx = n;
3734			goto out;
3735		}
3736
3737		err = intel_context_pin(ve[n]);
3738		if (err) {
3739			intel_context_put(ve[n]);
3740			nctx = n;
3741			goto out;
3742		}
3743	}
3744
3745	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3746	if (err)
3747		goto out;
3748
3749	for_each_prime_number_from(prime, 1, 8192) {
3750		times[1] = ktime_get_raw();
3751
3752		if (flags & CHAIN) {
3753			for (nc = 0; nc < nctx; nc++) {
3754				for (n = 0; n < prime; n++) {
3755					struct i915_request *rq;
3756
3757					rq = i915_request_create(ve[nc]);
3758					if (IS_ERR(rq)) {
3759						err = PTR_ERR(rq);
3760						goto out;
3761					}
3762
3763					if (request[nc])
3764						i915_request_put(request[nc]);
3765					request[nc] = i915_request_get(rq);
3766					i915_request_add(rq);
3767				}
3768			}
3769		} else {
3770			for (n = 0; n < prime; n++) {
3771				for (nc = 0; nc < nctx; nc++) {
3772					struct i915_request *rq;
3773
3774					rq = i915_request_create(ve[nc]);
3775					if (IS_ERR(rq)) {
3776						err = PTR_ERR(rq);
3777						goto out;
3778					}
3779
3780					if (request[nc])
3781						i915_request_put(request[nc]);
3782					request[nc] = i915_request_get(rq);
3783					i915_request_add(rq);
3784				}
3785			}
3786		}
3787
3788		for (nc = 0; nc < nctx; nc++) {
3789			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3790				pr_err("%s(%s): wait for %llx:%lld timed out\n",
3791				       __func__, ve[0]->engine->name,
3792				       request[nc]->fence.context,
3793				       request[nc]->fence.seqno);
3794
3795				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3796					  __func__, ve[0]->engine->name,
3797					  request[nc]->fence.context,
3798					  request[nc]->fence.seqno);
3799				GEM_TRACE_DUMP();
3800				intel_gt_set_wedged(gt);
3801				break;
3802			}
3803		}
3804
3805		times[1] = ktime_sub(ktime_get_raw(), times[1]);
3806		if (prime == 1)
3807			times[0] = times[1];
3808
3809		for (nc = 0; nc < nctx; nc++) {
3810			i915_request_put(request[nc]);
3811			request[nc] = NULL;
3812		}
3813
3814		if (__igt_timeout(end_time, NULL))
3815			break;
3816	}
3817
3818	err = igt_live_test_end(&t);
3819	if (err)
3820		goto out;
3821
3822	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3823		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3824		prime, div64_u64(ktime_to_ns(times[1]), prime));
3825
3826out:
3827	if (igt_flush_test(gt->i915))
3828		err = -EIO;
3829
3830	for (nc = 0; nc < nctx; nc++) {
3831		i915_request_put(request[nc]);
3832		intel_context_unpin(ve[nc]);
3833		intel_context_put(ve[nc]);
3834	}
3835	return err;
3836}
3837
3838static unsigned int
3839__select_siblings(struct intel_gt *gt,
3840		  unsigned int class,
3841		  struct intel_engine_cs **siblings,
3842		  bool (*filter)(const struct intel_engine_cs *))
3843{
3844	unsigned int n = 0;
3845	unsigned int inst;
3846
3847	for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3848		if (!gt->engine_class[class][inst])
3849			continue;
3850
3851		if (filter && !filter(gt->engine_class[class][inst]))
3852			continue;
3853
3854		siblings[n++] = gt->engine_class[class][inst];
3855	}
3856
3857	return n;
3858}
3859
3860static unsigned int
3861select_siblings(struct intel_gt *gt,
3862		unsigned int class,
3863		struct intel_engine_cs **siblings)
3864{
3865	return __select_siblings(gt, class, siblings, NULL);
3866}
3867
3868static int live_virtual_engine(void *arg)
3869{
3870	struct intel_gt *gt = arg;
3871	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3872	struct intel_engine_cs *engine;
3873	enum intel_engine_id id;
3874	unsigned int class;
3875	int err;
3876
3877	if (intel_uc_uses_guc_submission(&gt->uc))
3878		return 0;
3879
3880	for_each_engine(engine, gt, id) {
3881		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3882		if (err) {
3883			pr_err("Failed to wrap engine %s: err=%d\n",
3884			       engine->name, err);
3885			return err;
3886		}
3887	}
3888
3889	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3890		int nsibling, n;
3891
3892		nsibling = select_siblings(gt, class, siblings);
3893		if (nsibling < 2)
3894			continue;
3895
3896		for (n = 1; n <= nsibling + 1; n++) {
3897			err = nop_virtual_engine(gt, siblings, nsibling,
3898						 n, 0);
3899			if (err)
3900				return err;
3901		}
3902
3903		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3904		if (err)
3905			return err;
3906	}
3907
3908	return 0;
3909}
3910
3911static int mask_virtual_engine(struct intel_gt *gt,
3912			       struct intel_engine_cs **siblings,
3913			       unsigned int nsibling)
3914{
3915	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3916	struct intel_context *ve;
3917	struct igt_live_test t;
3918	unsigned int n;
3919	int err;
3920
3921	/*
3922	 * Check that by setting the execution mask on a request, we can
3923	 * restrict it to our desired engine within the virtual engine.
3924	 */
3925
3926	ve = intel_execlists_create_virtual(siblings, nsibling);
3927	if (IS_ERR(ve)) {
3928		err = PTR_ERR(ve);
3929		goto out_close;
3930	}
3931
3932	err = intel_context_pin(ve);
3933	if (err)
3934		goto out_put;
3935
3936	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3937	if (err)
3938		goto out_unpin;
3939
3940	for (n = 0; n < nsibling; n++) {
3941		request[n] = i915_request_create(ve);
3942		if (IS_ERR(request[n])) {
3943			err = PTR_ERR(request[n]);
3944			nsibling = n;
3945			goto out;
3946		}
3947
3948		/* Reverse order as it's more likely to be unnatural */
3949		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3950
3951		i915_request_get(request[n]);
3952		i915_request_add(request[n]);
3953	}
3954
3955	for (n = 0; n < nsibling; n++) {
3956		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3957			pr_err("%s(%s): wait for %llx:%lld timed out\n",
3958			       __func__, ve->engine->name,
3959			       request[n]->fence.context,
3960			       request[n]->fence.seqno);
3961
3962			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3963				  __func__, ve->engine->name,
3964				  request[n]->fence.context,
3965				  request[n]->fence.seqno);
3966			GEM_TRACE_DUMP();
3967			intel_gt_set_wedged(gt);
3968			err = -EIO;
3969			goto out;
3970		}
3971
3972		if (request[n]->engine != siblings[nsibling - n - 1]) {
3973			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3974			       request[n]->engine->name,
3975			       siblings[nsibling - n - 1]->name);
3976			err = -EINVAL;
3977			goto out;
3978		}
3979	}
3980
3981	err = igt_live_test_end(&t);
3982out:
3983	if (igt_flush_test(gt->i915))
3984		err = -EIO;
3985
3986	for (n = 0; n < nsibling; n++)
3987		i915_request_put(request[n]);
3988
3989out_unpin:
3990	intel_context_unpin(ve);
3991out_put:
3992	intel_context_put(ve);
3993out_close:
3994	return err;
3995}
3996
3997static int live_virtual_mask(void *arg)
3998{
3999	struct intel_gt *gt = arg;
4000	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4001	unsigned int class;
4002	int err;
4003
4004	if (intel_uc_uses_guc_submission(&gt->uc))
4005		return 0;
4006
4007	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4008		unsigned int nsibling;
4009
4010		nsibling = select_siblings(gt, class, siblings);
4011		if (nsibling < 2)
4012			continue;
4013
4014		err = mask_virtual_engine(gt, siblings, nsibling);
4015		if (err)
4016			return err;
4017	}
4018
4019	return 0;
4020}
4021
4022static int slicein_virtual_engine(struct intel_gt *gt,
4023				  struct intel_engine_cs **siblings,
4024				  unsigned int nsibling)
4025{
4026	const long timeout = slice_timeout(siblings[0]);
4027	struct intel_context *ce;
4028	struct i915_request *rq;
4029	struct igt_spinner spin;
4030	unsigned int n;
4031	int err = 0;
4032
4033	/*
4034	 * Virtual requests must take part in timeslicing on the target engines.
4035	 */
4036
4037	if (igt_spinner_init(&spin, gt))
4038		return -ENOMEM;
4039
4040	for (n = 0; n < nsibling; n++) {
4041		ce = intel_context_create(siblings[n]);
4042		if (IS_ERR(ce)) {
4043			err = PTR_ERR(ce);
4044			goto out;
4045		}
4046
4047		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4048		intel_context_put(ce);
4049		if (IS_ERR(rq)) {
4050			err = PTR_ERR(rq);
4051			goto out;
4052		}
4053
4054		i915_request_add(rq);
4055	}
4056
4057	ce = intel_execlists_create_virtual(siblings, nsibling);
4058	if (IS_ERR(ce)) {
4059		err = PTR_ERR(ce);
4060		goto out;
4061	}
4062
4063	rq = intel_context_create_request(ce);
4064	intel_context_put(ce);
4065	if (IS_ERR(rq)) {
4066		err = PTR_ERR(rq);
4067		goto out;
4068	}
4069
4070	i915_request_get(rq);
4071	i915_request_add(rq);
4072	if (i915_request_wait(rq, 0, timeout) < 0) {
4073		GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4074			      __func__, rq->engine->name);
4075		GEM_TRACE_DUMP();
4076		intel_gt_set_wedged(gt);
4077		err = -EIO;
4078	}
4079	i915_request_put(rq);
4080
4081out:
4082	igt_spinner_end(&spin);
4083	if (igt_flush_test(gt->i915))
4084		err = -EIO;
4085	igt_spinner_fini(&spin);
4086	return err;
4087}
4088
4089static int sliceout_virtual_engine(struct intel_gt *gt,
4090				   struct intel_engine_cs **siblings,
4091				   unsigned int nsibling)
4092{
4093	const long timeout = slice_timeout(siblings[0]);
4094	struct intel_context *ce;
4095	struct i915_request *rq;
4096	struct igt_spinner spin;
4097	unsigned int n;
4098	int err = 0;
4099
4100	/*
4101	 * Virtual requests must allow others a fair timeslice.
4102	 */
4103
4104	if (igt_spinner_init(&spin, gt))
4105		return -ENOMEM;
4106
4107	/* XXX We do not handle oversubscription and fairness with normal rq */
4108	for (n = 0; n < nsibling; n++) {
4109		ce = intel_execlists_create_virtual(siblings, nsibling);
4110		if (IS_ERR(ce)) {
4111			err = PTR_ERR(ce);
4112			goto out;
4113		}
4114
4115		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4116		intel_context_put(ce);
4117		if (IS_ERR(rq)) {
4118			err = PTR_ERR(rq);
4119			goto out;
4120		}
4121
4122		i915_request_add(rq);
4123	}
4124
4125	for (n = 0; !err && n < nsibling; n++) {
4126		ce = intel_context_create(siblings[n]);
4127		if (IS_ERR(ce)) {
4128			err = PTR_ERR(ce);
4129			goto out;
4130		}
4131
4132		rq = intel_context_create_request(ce);
4133		intel_context_put(ce);
4134		if (IS_ERR(rq)) {
4135			err = PTR_ERR(rq);
4136			goto out;
4137		}
4138
4139		i915_request_get(rq);
4140		i915_request_add(rq);
4141		if (i915_request_wait(rq, 0, timeout) < 0) {
4142			GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4143				      __func__, siblings[n]->name);
4144			GEM_TRACE_DUMP();
4145			intel_gt_set_wedged(gt);
4146			err = -EIO;
4147		}
4148		i915_request_put(rq);
4149	}
4150
4151out:
4152	igt_spinner_end(&spin);
4153	if (igt_flush_test(gt->i915))
4154		err = -EIO;
4155	igt_spinner_fini(&spin);
4156	return err;
4157}
4158
4159static int live_virtual_slice(void *arg)
4160{
4161	struct intel_gt *gt = arg;
4162	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4163	unsigned int class;
4164	int err;
4165
4166	if (intel_uc_uses_guc_submission(&gt->uc))
4167		return 0;
4168
4169	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4170		unsigned int nsibling;
4171
4172		nsibling = __select_siblings(gt, class, siblings,
4173					     intel_engine_has_timeslices);
4174		if (nsibling < 2)
4175			continue;
4176
4177		err = slicein_virtual_engine(gt, siblings, nsibling);
4178		if (err)
4179			return err;
4180
4181		err = sliceout_virtual_engine(gt, siblings, nsibling);
4182		if (err)
4183			return err;
4184	}
4185
4186	return 0;
4187}
4188
4189static int preserved_virtual_engine(struct intel_gt *gt,
4190				    struct intel_engine_cs **siblings,
4191				    unsigned int nsibling)
4192{
4193	struct i915_request *last = NULL;
4194	struct intel_context *ve;
4195	struct i915_vma *scratch;
4196	struct igt_live_test t;
4197	unsigned int n;
4198	int err = 0;
4199	u32 *cs;
4200
4201	scratch =
4202		__vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4203						    PAGE_SIZE);
4204	if (IS_ERR(scratch))
4205		return PTR_ERR(scratch);
4206
4207	err = i915_vma_sync(scratch);
4208	if (err)
4209		goto out_scratch;
4210
4211	ve = intel_execlists_create_virtual(siblings, nsibling);
4212	if (IS_ERR(ve)) {
4213		err = PTR_ERR(ve);
4214		goto out_scratch;
4215	}
4216
4217	err = intel_context_pin(ve);
4218	if (err)
4219		goto out_put;
4220
4221	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4222	if (err)
4223		goto out_unpin;
4224
4225	for (n = 0; n < NUM_GPR_DW; n++) {
4226		struct intel_engine_cs *engine = siblings[n % nsibling];
4227		struct i915_request *rq;
4228
4229		rq = i915_request_create(ve);
4230		if (IS_ERR(rq)) {
4231			err = PTR_ERR(rq);
4232			goto out_end;
4233		}
4234
4235		i915_request_put(last);
4236		last = i915_request_get(rq);
4237
4238		cs = intel_ring_begin(rq, 8);
4239		if (IS_ERR(cs)) {
4240			i915_request_add(rq);
4241			err = PTR_ERR(cs);
4242			goto out_end;
4243		}
4244
4245		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4246		*cs++ = CS_GPR(engine, n);
4247		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4248		*cs++ = 0;
4249
4250		*cs++ = MI_LOAD_REGISTER_IMM(1);
4251		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4252		*cs++ = n + 1;
4253
4254		*cs++ = MI_NOOP;
4255		intel_ring_advance(rq, cs);
4256
4257		/* Restrict this request to run on a particular engine */
4258		rq->execution_mask = engine->mask;
4259		i915_request_add(rq);
4260	}
4261
4262	if (i915_request_wait(last, 0, HZ / 5) < 0) {
4263		err = -ETIME;
4264		goto out_end;
4265	}
4266
4267	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4268	if (IS_ERR(cs)) {
4269		err = PTR_ERR(cs);
4270		goto out_end;
4271	}
4272
4273	for (n = 0; n < NUM_GPR_DW; n++) {
4274		if (cs[n] != n) {
4275			pr_err("Incorrect value[%d] found for GPR[%d]\n",
4276			       cs[n], n);
4277			err = -EINVAL;
4278			break;
4279		}
4280	}
4281
4282	i915_gem_object_unpin_map(scratch->obj);
4283
4284out_end:
4285	if (igt_live_test_end(&t))
4286		err = -EIO;
4287	i915_request_put(last);
4288out_unpin:
4289	intel_context_unpin(ve);
4290out_put:
4291	intel_context_put(ve);
4292out_scratch:
4293	i915_vma_unpin_and_release(&scratch, 0);
4294	return err;
4295}
4296
4297static int live_virtual_preserved(void *arg)
4298{
4299	struct intel_gt *gt = arg;
4300	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4301	unsigned int class;
4302
4303	/*
4304	 * Check that the context image retains non-privileged (user) registers
4305	 * from one engine to the next. For this we check that the CS_GPR
4306	 * are preserved.
4307	 */
4308
4309	if (intel_uc_uses_guc_submission(&gt->uc))
4310		return 0;
4311
4312	/* As we use CS_GPR we cannot run before they existed on all engines. */
4313	if (GRAPHICS_VER(gt->i915) < 9)
4314		return 0;
4315
4316	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4317		int nsibling, err;
4318
4319		nsibling = select_siblings(gt, class, siblings);
4320		if (nsibling < 2)
4321			continue;
4322
4323		err = preserved_virtual_engine(gt, siblings, nsibling);
4324		if (err)
4325			return err;
4326	}
4327
4328	return 0;
4329}
4330
4331static int bond_virtual_engine(struct intel_gt *gt,
4332			       unsigned int class,
4333			       struct intel_engine_cs **siblings,
4334			       unsigned int nsibling,
4335			       unsigned int flags)
4336#define BOND_SCHEDULE BIT(0)
4337{
4338	struct intel_engine_cs *master;
4339	struct i915_request *rq[16];
4340	enum intel_engine_id id;
4341	struct igt_spinner spin;
4342	unsigned long n;
4343	int err;
4344
4345	/*
4346	 * A set of bonded requests is intended to be run concurrently
4347	 * across a number of engines. We use one request per-engine
4348	 * and a magic fence to schedule each of the bonded requests
4349	 * at the same time. A consequence of our current scheduler is that
4350	 * we only move requests to the HW ready queue when the request
4351	 * becomes ready, that is when all of its prerequisite fences have
4352	 * been signaled. As one of those fences is the master submit fence,
4353	 * there is a delay on all secondary fences as the HW may be
4354	 * currently busy. Equally, as all the requests are independent,
4355	 * they may have other fences that delay individual request
4356	 * submission to HW. Ergo, we do not guarantee that all requests are
4357	 * immediately submitted to HW at the same time, just that if the
4358	 * rules are abided by, they are ready at the same time as the
4359	 * first is submitted. Userspace can embed semaphores in its batch
4360	 * to ensure parallel execution of its phases as it requires.
4361	 * Though naturally it gets requested that perhaps the scheduler should
4362	 * take care of parallel execution, even across preemption events on
4363	 * different HW. (The proper answer is of course "lalalala".)
4364	 *
4365	 * With the submit-fence, we have identified three possible phases
4366	 * of synchronisation depending on the master fence: queued (not
4367	 * ready), executing, and signaled. The first two are quite simple
4368	 * and checked below. However, the signaled master fence handling is
4369	 * contentious. Currently we do not distinguish between a signaled
4370	 * fence and an expired fence, as once signaled it does not convey
4371	 * any information about the previous execution. It may even be freed
4372	 * and hence checking later it may not exist at all. Ergo we currently
4373	 * do not apply the bonding constraint for an already signaled fence,
4374	 * as our expectation is that it should not constrain the secondaries
4375	 * and is outside of the scope of the bonded request API (i.e. all
4376	 * userspace requests are meant to be running in parallel). As
4377	 * it imposes no constraint, and is effectively a no-op, we do not
4378	 * check below as normal execution flows are checked extensively above.
4379	 *
4380	 * XXX Is the degenerate handling of signaled submit fences the
4381	 * expected behaviour for userpace?
4382	 */
4383
4384	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4385
4386	if (igt_spinner_init(&spin, gt))
4387		return -ENOMEM;
4388
4389	err = 0;
4390	rq[0] = ERR_PTR(-ENOMEM);
4391	for_each_engine(master, gt, id) {
4392		struct i915_sw_fence fence = {};
4393		struct intel_context *ce;
4394
4395		if (master->class == class)
4396			continue;
4397
4398		ce = intel_context_create(master);
4399		if (IS_ERR(ce)) {
4400			err = PTR_ERR(ce);
4401			goto out;
4402		}
4403
4404		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4405
4406		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4407		intel_context_put(ce);
4408		if (IS_ERR(rq[0])) {
4409			err = PTR_ERR(rq[0]);
4410			goto out;
4411		}
4412		i915_request_get(rq[0]);
4413
4414		if (flags & BOND_SCHEDULE) {
4415			onstack_fence_init(&fence);
4416			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4417							       &fence,
4418							       GFP_KERNEL);
4419		}
4420
4421		i915_request_add(rq[0]);
4422		if (err < 0)
4423			goto out;
4424
4425		if (!(flags & BOND_SCHEDULE) &&
4426		    !igt_wait_for_spinner(&spin, rq[0])) {
4427			err = -EIO;
4428			goto out;
4429		}
4430
4431		for (n = 0; n < nsibling; n++) {
4432			struct intel_context *ve;
4433
4434			ve = intel_execlists_create_virtual(siblings, nsibling);
4435			if (IS_ERR(ve)) {
4436				err = PTR_ERR(ve);
4437				onstack_fence_fini(&fence);
4438				goto out;
4439			}
4440
4441			err = intel_virtual_engine_attach_bond(ve->engine,
4442							       master,
4443							       siblings[n]);
4444			if (err) {
4445				intel_context_put(ve);
4446				onstack_fence_fini(&fence);
4447				goto out;
4448			}
4449
4450			err = intel_context_pin(ve);
4451			intel_context_put(ve);
4452			if (err) {
4453				onstack_fence_fini(&fence);
4454				goto out;
4455			}
4456
4457			rq[n + 1] = i915_request_create(ve);
4458			intel_context_unpin(ve);
4459			if (IS_ERR(rq[n + 1])) {
4460				err = PTR_ERR(rq[n + 1]);
4461				onstack_fence_fini(&fence);
4462				goto out;
4463			}
4464			i915_request_get(rq[n + 1]);
4465
4466			err = i915_request_await_execution(rq[n + 1],
4467							   &rq[0]->fence,
4468							   ve->engine->bond_execute);
4469			i915_request_add(rq[n + 1]);
4470			if (err < 0) {
4471				onstack_fence_fini(&fence);
4472				goto out;
4473			}
4474		}
4475		onstack_fence_fini(&fence);
4476		intel_engine_flush_submission(master);
4477		igt_spinner_end(&spin);
4478
4479		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4480			pr_err("Master request did not execute (on %s)!\n",
4481			       rq[0]->engine->name);
4482			err = -EIO;
4483			goto out;
4484		}
4485
4486		for (n = 0; n < nsibling; n++) {
4487			if (i915_request_wait(rq[n + 1], 0,
4488					      MAX_SCHEDULE_TIMEOUT) < 0) {
4489				err = -EIO;
4490				goto out;
4491			}
4492
4493			if (rq[n + 1]->engine != siblings[n]) {
4494				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4495				       siblings[n]->name,
4496				       rq[n + 1]->engine->name,
4497				       rq[0]->engine->name);
4498				err = -EINVAL;
4499				goto out;
4500			}
4501		}
4502
4503		for (n = 0; !IS_ERR(rq[n]); n++)
4504			i915_request_put(rq[n]);
4505		rq[0] = ERR_PTR(-ENOMEM);
4506	}
4507
4508out:
4509	for (n = 0; !IS_ERR(rq[n]); n++)
4510		i915_request_put(rq[n]);
4511	if (igt_flush_test(gt->i915))
4512		err = -EIO;
4513
4514	igt_spinner_fini(&spin);
4515	return err;
4516}
4517
4518static int live_virtual_bond(void *arg)
4519{
4520	static const struct phase {
4521		const char *name;
4522		unsigned int flags;
4523	} phases[] = {
4524		{ "", 0 },
4525		{ "schedule", BOND_SCHEDULE },
4526		{ },
4527	};
4528	struct intel_gt *gt = arg;
4529	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4530	unsigned int class;
4531	int err;
4532
4533	if (intel_uc_uses_guc_submission(&gt->uc))
4534		return 0;
4535
4536	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4537		const struct phase *p;
4538		int nsibling;
4539
4540		nsibling = select_siblings(gt, class, siblings);
4541		if (nsibling < 2)
4542			continue;
4543
4544		for (p = phases; p->name; p++) {
4545			err = bond_virtual_engine(gt,
4546						  class, siblings, nsibling,
4547						  p->flags);
4548			if (err) {
4549				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4550				       __func__, p->name, class, nsibling, err);
4551				return err;
4552			}
4553		}
4554	}
4555
4556	return 0;
4557}
4558
4559static int reset_virtual_engine(struct intel_gt *gt,
4560				struct intel_engine_cs **siblings,
4561				unsigned int nsibling)
4562{
4563	struct intel_engine_cs *engine;
4564	struct intel_context *ve;
4565	struct igt_spinner spin;
4566	struct i915_request *rq;
4567	unsigned int n;
4568	int err = 0;
4569
4570	/*
4571	 * In order to support offline error capture for fast preempt reset,
4572	 * we need to decouple the guilty request and ensure that it and its
4573	 * descendents are not executed while the capture is in progress.
4574	 */
4575
4576	if (igt_spinner_init(&spin, gt))
4577		return -ENOMEM;
4578
4579	ve = intel_execlists_create_virtual(siblings, nsibling);
4580	if (IS_ERR(ve)) {
4581		err = PTR_ERR(ve);
4582		goto out_spin;
4583	}
4584
4585	for (n = 0; n < nsibling; n++)
4586		st_engine_heartbeat_disable(siblings[n]);
4587
4588	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4589	if (IS_ERR(rq)) {
4590		err = PTR_ERR(rq);
4591		goto out_heartbeat;
4592	}
4593	i915_request_add(rq);
4594
4595	if (!igt_wait_for_spinner(&spin, rq)) {
4596		intel_gt_set_wedged(gt);
4597		err = -ETIME;
4598		goto out_heartbeat;
4599	}
4600
4601	engine = rq->engine;
4602	GEM_BUG_ON(engine == ve->engine);
4603
4604	/* Take ownership of the reset and tasklet */
4605	err = engine_lock_reset_tasklet(engine);
4606	if (err)
4607		goto out_heartbeat;
4608
4609	engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4610	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4611
4612	/* Fake a preemption event; failed of course */
4613	spin_lock_irq(&engine->active.lock);
4614	__unwind_incomplete_requests(engine);
4615	spin_unlock_irq(&engine->active.lock);
4616	GEM_BUG_ON(rq->engine != engine);
4617
4618	/* Reset the engine while keeping our active request on hold */
4619	execlists_hold(engine, rq);
4620	GEM_BUG_ON(!i915_request_on_hold(rq));
4621
4622	__intel_engine_reset_bh(engine, NULL);
4623	GEM_BUG_ON(rq->fence.error != -EIO);
4624
4625	/* Release our grasp on the engine, letting CS flow again */
4626	engine_unlock_reset_tasklet(engine);
4627
4628	/* Check that we do not resubmit the held request */
4629	i915_request_get(rq);
4630	if (!i915_request_wait(rq, 0, HZ / 5)) {
4631		pr_err("%s: on hold request completed!\n",
4632		       engine->name);
4633		intel_gt_set_wedged(gt);
4634		err = -EIO;
4635		goto out_rq;
4636	}
4637	GEM_BUG_ON(!i915_request_on_hold(rq));
4638
4639	/* But is resubmitted on release */
4640	execlists_unhold(engine, rq);
4641	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4642		pr_err("%s: held request did not complete!\n",
4643		       engine->name);
4644		intel_gt_set_wedged(gt);
4645		err = -ETIME;
4646	}
4647
4648out_rq:
4649	i915_request_put(rq);
4650out_heartbeat:
4651	for (n = 0; n < nsibling; n++)
4652		st_engine_heartbeat_enable(siblings[n]);
4653
4654	intel_context_put(ve);
4655out_spin:
4656	igt_spinner_fini(&spin);
4657	return err;
4658}
4659
4660static int live_virtual_reset(void *arg)
4661{
4662	struct intel_gt *gt = arg;
4663	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4664	unsigned int class;
4665
4666	/*
4667	 * Check that we handle a reset event within a virtual engine.
4668	 * Only the physical engine is reset, but we have to check the flow
4669	 * of the virtual requests around the reset, and make sure it is not
4670	 * forgotten.
4671	 */
4672
4673	if (intel_uc_uses_guc_submission(&gt->uc))
4674		return 0;
4675
4676	if (!intel_has_reset_engine(gt))
4677		return 0;
4678
4679	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4680		int nsibling, err;
4681
4682		nsibling = select_siblings(gt, class, siblings);
4683		if (nsibling < 2)
4684			continue;
4685
4686		err = reset_virtual_engine(gt, siblings, nsibling);
4687		if (err)
4688			return err;
4689	}
4690
4691	return 0;
4692}
4693
4694int intel_execlists_live_selftests(struct drm_i915_private *i915)
4695{
4696	static const struct i915_subtest tests[] = {
4697		SUBTEST(live_sanitycheck),
4698		SUBTEST(live_unlite_switch),
4699		SUBTEST(live_unlite_preempt),
4700		SUBTEST(live_unlite_ring),
4701		SUBTEST(live_pin_rewind),
4702		SUBTEST(live_hold_reset),
4703		SUBTEST(live_error_interrupt),
4704		SUBTEST(live_timeslice_preempt),
4705		SUBTEST(live_timeslice_rewind),
4706		SUBTEST(live_timeslice_queue),
4707		SUBTEST(live_timeslice_nopreempt),
4708		SUBTEST(live_busywait_preempt),
4709		SUBTEST(live_preempt),
4710		SUBTEST(live_late_preempt),
4711		SUBTEST(live_nopreempt),
4712		SUBTEST(live_preempt_cancel),
4713		SUBTEST(live_suppress_self_preempt),
4714		SUBTEST(live_chain_preempt),
4715		SUBTEST(live_preempt_ring),
4716		SUBTEST(live_preempt_gang),
4717		SUBTEST(live_preempt_timeout),
4718		SUBTEST(live_preempt_user),
4719		SUBTEST(live_preempt_smoke),
4720		SUBTEST(live_virtual_engine),
4721		SUBTEST(live_virtual_mask),
4722		SUBTEST(live_virtual_preserved),
4723		SUBTEST(live_virtual_slice),
4724		SUBTEST(live_virtual_bond),
4725		SUBTEST(live_virtual_reset),
4726	};
4727
4728	if (i915->gt.submission_method != INTEL_SUBMISSION_ELSP)
4729		return 0;
4730
4731	if (intel_gt_is_wedged(&i915->gt))
4732		return 0;
4733
4734	return intel_gt_live_subtests(tests, &i915->gt);
4735}