i915_request.c - drivers/gpu/drm/i915/selftests/i915_request.c - Linux source code v3.1

Note: File does not exist in v3.1.
   1/*
   2 * Copyright © 2016 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 */
  24
  25#include <linux/prime_numbers.h>
  26
  27#include "gem/i915_gem_pm.h"
  28#include "gem/selftests/mock_context.h"
  29
  30#include "gt/intel_gt.h"
  31
  32#include "i915_random.h"
  33#include "i915_selftest.h"
  34#include "igt_live_test.h"
  35#include "lib_sw_fence.h"
  36
  37#include "mock_drm.h"
  38#include "mock_gem_device.h"
  39
  40static int igt_add_request(void *arg)
  41{
  42	struct drm_i915_private *i915 = arg;
  43	struct i915_request *request;
  44	int err = -ENOMEM;
  45
  46	/* Basic preliminary test to create a request and let it loose! */
  47
  48	mutex_lock(&i915->drm.struct_mutex);
  49	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
  50	if (!request)
  51		goto out_unlock;
  52
  53	i915_request_add(request);
  54
  55	err = 0;
  56out_unlock:
  57	mutex_unlock(&i915->drm.struct_mutex);
  58	return err;
  59}
  60
  61static int igt_wait_request(void *arg)
  62{
  63	const long T = HZ / 4;
  64	struct drm_i915_private *i915 = arg;
  65	struct i915_request *request;
  66	int err = -EINVAL;
  67
  68	/* Submit a request, then wait upon it */
  69
  70	mutex_lock(&i915->drm.struct_mutex);
  71	request = mock_request(i915->engine[RCS0]->kernel_context, T);
  72	if (!request) {
  73		err = -ENOMEM;
  74		goto out_unlock;
  75	}
  76	i915_request_get(request);
  77
  78	if (i915_request_wait(request, 0, 0) != -ETIME) {
  79		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
  80		goto out_request;
  81	}
  82
  83	if (i915_request_wait(request, 0, T) != -ETIME) {
  84		pr_err("request wait succeeded (expected timeout before submit!)\n");
  85		goto out_request;
  86	}
  87
  88	if (i915_request_completed(request)) {
  89		pr_err("request completed before submit!!\n");
  90		goto out_request;
  91	}
  92
  93	i915_request_add(request);
  94
  95	if (i915_request_wait(request, 0, 0) != -ETIME) {
  96		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
  97		goto out_request;
  98	}
  99
 100	if (i915_request_completed(request)) {
 101		pr_err("request completed immediately!\n");
 102		goto out_request;
 103	}
 104
 105	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 106		pr_err("request wait succeeded (expected timeout!)\n");
 107		goto out_request;
 108	}
 109
 110	if (i915_request_wait(request, 0, T) == -ETIME) {
 111		pr_err("request wait timed out!\n");
 112		goto out_request;
 113	}
 114
 115	if (!i915_request_completed(request)) {
 116		pr_err("request not complete after waiting!\n");
 117		goto out_request;
 118	}
 119
 120	if (i915_request_wait(request, 0, T) == -ETIME) {
 121		pr_err("request wait timed out when already complete!\n");
 122		goto out_request;
 123	}
 124
 125	err = 0;
 126out_request:
 127	i915_request_put(request);
 128out_unlock:
 129	mock_device_flush(i915);
 130	mutex_unlock(&i915->drm.struct_mutex);
 131	return err;
 132}
 133
 134static int igt_fence_wait(void *arg)
 135{
 136	const long T = HZ / 4;
 137	struct drm_i915_private *i915 = arg;
 138	struct i915_request *request;
 139	int err = -EINVAL;
 140
 141	/* Submit a request, treat it as a fence and wait upon it */
 142
 143	mutex_lock(&i915->drm.struct_mutex);
 144	request = mock_request(i915->engine[RCS0]->kernel_context, T);
 145	if (!request) {
 146		err = -ENOMEM;
 147		goto out_locked;
 148	}
 149
 150	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 151		pr_err("fence wait success before submit (expected timeout)!\n");
 152		goto out_locked;
 153	}
 154
 155	i915_request_add(request);
 156	mutex_unlock(&i915->drm.struct_mutex);
 157
 158	if (dma_fence_is_signaled(&request->fence)) {
 159		pr_err("fence signaled immediately!\n");
 160		goto out_device;
 161	}
 162
 163	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 164		pr_err("fence wait success after submit (expected timeout)!\n");
 165		goto out_device;
 166	}
 167
 168	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 169		pr_err("fence wait timed out (expected success)!\n");
 170		goto out_device;
 171	}
 172
 173	if (!dma_fence_is_signaled(&request->fence)) {
 174		pr_err("fence unsignaled after waiting!\n");
 175		goto out_device;
 176	}
 177
 178	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 179		pr_err("fence wait timed out when complete (expected success)!\n");
 180		goto out_device;
 181	}
 182
 183	err = 0;
 184out_device:
 185	mutex_lock(&i915->drm.struct_mutex);
 186out_locked:
 187	mock_device_flush(i915);
 188	mutex_unlock(&i915->drm.struct_mutex);
 189	return err;
 190}
 191
 192static int igt_request_rewind(void *arg)
 193{
 194	struct drm_i915_private *i915 = arg;
 195	struct i915_request *request, *vip;
 196	struct i915_gem_context *ctx[2];
 197	struct intel_context *ce;
 198	int err = -EINVAL;
 199
 200	mutex_lock(&i915->drm.struct_mutex);
 201	ctx[0] = mock_context(i915, "A");
 202	ce = i915_gem_context_get_engine(ctx[0], RCS0);
 203	GEM_BUG_ON(IS_ERR(ce));
 204	request = mock_request(ce, 2 * HZ);
 205	intel_context_put(ce);
 206	if (!request) {
 207		err = -ENOMEM;
 208		goto err_context_0;
 209	}
 210
 211	i915_request_get(request);
 212	i915_request_add(request);
 213
 214	ctx[1] = mock_context(i915, "B");
 215	ce = i915_gem_context_get_engine(ctx[1], RCS0);
 216	GEM_BUG_ON(IS_ERR(ce));
 217	vip = mock_request(ce, 0);
 218	intel_context_put(ce);
 219	if (!vip) {
 220		err = -ENOMEM;
 221		goto err_context_1;
 222	}
 223
 224	/* Simulate preemption by manual reordering */
 225	if (!mock_cancel_request(request)) {
 226		pr_err("failed to cancel request (already executed)!\n");
 227		i915_request_add(vip);
 228		goto err_context_1;
 229	}
 230	i915_request_get(vip);
 231	i915_request_add(vip);
 232	rcu_read_lock();
 233	request->engine->submit_request(request);
 234	rcu_read_unlock();
 235
 236	mutex_unlock(&i915->drm.struct_mutex);
 237
 238	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 239		pr_err("timed out waiting for high priority request\n");
 240		goto err;
 241	}
 242
 243	if (i915_request_completed(request)) {
 244		pr_err("low priority request already completed\n");
 245		goto err;
 246	}
 247
 248	err = 0;
 249err:
 250	i915_request_put(vip);
 251	mutex_lock(&i915->drm.struct_mutex);
 252err_context_1:
 253	mock_context_close(ctx[1]);
 254	i915_request_put(request);
 255err_context_0:
 256	mock_context_close(ctx[0]);
 257	mock_device_flush(i915);
 258	mutex_unlock(&i915->drm.struct_mutex);
 259	return err;
 260}
 261
 262struct smoketest {
 263	struct intel_engine_cs *engine;
 264	struct i915_gem_context **contexts;
 265	atomic_long_t num_waits, num_fences;
 266	int ncontexts, max_batch;
 267	struct i915_request *(*request_alloc)(struct intel_context *ce);
 268};
 269
 270static struct i915_request *
 271__mock_request_alloc(struct intel_context *ce)
 272{
 273	return mock_request(ce, 0);
 274}
 275
 276static struct i915_request *
 277__live_request_alloc(struct intel_context *ce)
 278{
 279	return intel_context_create_request(ce);
 280}
 281
 282static int __igt_breadcrumbs_smoketest(void *arg)
 283{
 284	struct smoketest *t = arg;
 285	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 286	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 287	const unsigned int total = 4 * t->ncontexts + 1;
 288	unsigned int num_waits = 0, num_fences = 0;
 289	struct i915_request **requests;
 290	I915_RND_STATE(prng);
 291	unsigned int *order;
 292	int err = 0;
 293
 294	/*
 295	 * A very simple test to catch the most egregious of list handling bugs.
 296	 *
 297	 * At its heart, we simply create oodles of requests running across
 298	 * multiple kthreads and enable signaling on them, for the sole purpose
 299	 * of stressing our breadcrumb handling. The only inspection we do is
 300	 * that the fences were marked as signaled.
 301	 */
 302
 303	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
 304	if (!requests)
 305		return -ENOMEM;
 306
 307	order = i915_random_order(total, &prng);
 308	if (!order) {
 309		err = -ENOMEM;
 310		goto out_requests;
 311	}
 312
 313	while (!kthread_should_stop()) {
 314		struct i915_sw_fence *submit, *wait;
 315		unsigned int n, count;
 316
 317		submit = heap_fence_create(GFP_KERNEL);
 318		if (!submit) {
 319			err = -ENOMEM;
 320			break;
 321		}
 322
 323		wait = heap_fence_create(GFP_KERNEL);
 324		if (!wait) {
 325			i915_sw_fence_commit(submit);
 326			heap_fence_put(submit);
 327			err = ENOMEM;
 328			break;
 329		}
 330
 331		i915_random_reorder(order, total, &prng);
 332		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
 333
 334		for (n = 0; n < count; n++) {
 335			struct i915_gem_context *ctx =
 336				t->contexts[order[n] % t->ncontexts];
 337			struct i915_request *rq;
 338			struct intel_context *ce;
 339
 340			mutex_lock(BKL);
 341
 342			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 343			GEM_BUG_ON(IS_ERR(ce));
 344			rq = t->request_alloc(ce);
 345			intel_context_put(ce);
 346			if (IS_ERR(rq)) {
 347				mutex_unlock(BKL);
 348				err = PTR_ERR(rq);
 349				count = n;
 350				break;
 351			}
 352
 353			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 354							       submit,
 355							       GFP_KERNEL);
 356
 357			requests[n] = i915_request_get(rq);
 358			i915_request_add(rq);
 359
 360			mutex_unlock(BKL);
 361
 362			if (err >= 0)
 363				err = i915_sw_fence_await_dma_fence(wait,
 364								    &rq->fence,
 365								    0,
 366								    GFP_KERNEL);
 367
 368			if (err < 0) {
 369				i915_request_put(rq);
 370				count = n;
 371				break;
 372			}
 373		}
 374
 375		i915_sw_fence_commit(submit);
 376		i915_sw_fence_commit(wait);
 377
 378		if (!wait_event_timeout(wait->wait,
 379					i915_sw_fence_done(wait),
 380					5 * HZ)) {
 381			struct i915_request *rq = requests[count - 1];
 382
 383			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
 384			       atomic_read(&wait->pending), count,
 385			       rq->fence.context, rq->fence.seqno,
 386			       t->engine->name);
 387			GEM_TRACE_DUMP();
 388
 389			intel_gt_set_wedged(t->engine->gt);
 390			GEM_BUG_ON(!i915_request_completed(rq));
 391			i915_sw_fence_wait(wait);
 392			err = -EIO;
 393		}
 394
 395		for (n = 0; n < count; n++) {
 396			struct i915_request *rq = requests[n];
 397
 398			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 399				      &rq->fence.flags)) {
 400				pr_err("%llu:%llu was not signaled!\n",
 401				       rq->fence.context, rq->fence.seqno);
 402				err = -EINVAL;
 403			}
 404
 405			i915_request_put(rq);
 406		}
 407
 408		heap_fence_put(wait);
 409		heap_fence_put(submit);
 410
 411		if (err < 0)
 412			break;
 413
 414		num_fences += count;
 415		num_waits++;
 416
 417		cond_resched();
 418	}
 419
 420	atomic_long_add(num_fences, &t->num_fences);
 421	atomic_long_add(num_waits, &t->num_waits);
 422
 423	kfree(order);
 424out_requests:
 425	kfree(requests);
 426	return err;
 427}
 428
 429static int mock_breadcrumbs_smoketest(void *arg)
 430{
 431	struct drm_i915_private *i915 = arg;
 432	struct smoketest t = {
 433		.engine = i915->engine[RCS0],
 434		.ncontexts = 1024,
 435		.max_batch = 1024,
 436		.request_alloc = __mock_request_alloc
 437	};
 438	unsigned int ncpus = num_online_cpus();
 439	struct task_struct **threads;
 440	unsigned int n;
 441	int ret = 0;
 442
 443	/*
 444	 * Smoketest our breadcrumb/signal handling for requests across multiple
 445	 * threads. A very simple test to only catch the most egregious of bugs.
 446	 * See __igt_breadcrumbs_smoketest();
 447	 */
 448
 449	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
 450	if (!threads)
 451		return -ENOMEM;
 452
 453	t.contexts =
 454		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 455	if (!t.contexts) {
 456		ret = -ENOMEM;
 457		goto out_threads;
 458	}
 459
 460	mutex_lock(&t.engine->i915->drm.struct_mutex);
 461	for (n = 0; n < t.ncontexts; n++) {
 462		t.contexts[n] = mock_context(t.engine->i915, "mock");
 463		if (!t.contexts[n]) {
 464			ret = -ENOMEM;
 465			goto out_contexts;
 466		}
 467	}
 468	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 469
 470	for (n = 0; n < ncpus; n++) {
 471		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
 472					 &t, "igt/%d", n);
 473		if (IS_ERR(threads[n])) {
 474			ret = PTR_ERR(threads[n]);
 475			ncpus = n;
 476			break;
 477		}
 478
 479		get_task_struct(threads[n]);
 480	}
 481
 482	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 483
 484	for (n = 0; n < ncpus; n++) {
 485		int err;
 486
 487		err = kthread_stop(threads[n]);
 488		if (err < 0 && !ret)
 489			ret = err;
 490
 491		put_task_struct(threads[n]);
 492	}
 493	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
 494		atomic_long_read(&t.num_waits),
 495		atomic_long_read(&t.num_fences),
 496		ncpus);
 497
 498	mutex_lock(&t.engine->i915->drm.struct_mutex);
 499out_contexts:
 500	for (n = 0; n < t.ncontexts; n++) {
 501		if (!t.contexts[n])
 502			break;
 503		mock_context_close(t.contexts[n]);
 504	}
 505	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 506	kfree(t.contexts);
 507out_threads:
 508	kfree(threads);
 509
 510	return ret;
 511}
 512
 513int i915_request_mock_selftests(void)
 514{
 515	static const struct i915_subtest tests[] = {
 516		SUBTEST(igt_add_request),
 517		SUBTEST(igt_wait_request),
 518		SUBTEST(igt_fence_wait),
 519		SUBTEST(igt_request_rewind),
 520		SUBTEST(mock_breadcrumbs_smoketest),
 521	};
 522	struct drm_i915_private *i915;
 523	intel_wakeref_t wakeref;
 524	int err = 0;
 525
 526	i915 = mock_gem_device();
 527	if (!i915)
 528		return -ENOMEM;
 529
 530	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 531		err = i915_subtests(tests, i915);
 532
 533	drm_dev_put(&i915->drm);
 534
 535	return err;
 536}
 537
 538static int live_nop_request(void *arg)
 539{
 540	struct drm_i915_private *i915 = arg;
 541	struct intel_engine_cs *engine;
 542	intel_wakeref_t wakeref;
 543	struct igt_live_test t;
 544	unsigned int id;
 545	int err = -ENODEV;
 546
 547	/* Submit various sized batches of empty requests, to each engine
 548	 * (individually), and wait for the batch to complete. We can check
 549	 * the overhead of submitting requests to the hardware.
 550	 */
 551
 552	mutex_lock(&i915->drm.struct_mutex);
 553	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 554
 555	for_each_engine(engine, i915, id) {
 556		struct i915_request *request = NULL;
 557		unsigned long n, prime;
 558		IGT_TIMEOUT(end_time);
 559		ktime_t times[2] = {};
 560
 561		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 562		if (err)
 563			goto out_unlock;
 564
 565		for_each_prime_number_from(prime, 1, 8192) {
 566			times[1] = ktime_get_raw();
 567
 568			for (n = 0; n < prime; n++) {
 569				request = i915_request_create(engine->kernel_context);
 570				if (IS_ERR(request)) {
 571					err = PTR_ERR(request);
 572					goto out_unlock;
 573				}
 574
 575				/* This space is left intentionally blank.
 576				 *
 577				 * We do not actually want to perform any
 578				 * action with this request, we just want
 579				 * to measure the latency in allocation
 580				 * and submission of our breadcrumbs -
 581				 * ensuring that the bare request is sufficient
 582				 * for the system to work (i.e. proper HEAD
 583				 * tracking of the rings, interrupt handling,
 584				 * etc). It also gives us the lowest bounds
 585				 * for latency.
 586				 */
 587
 588				i915_request_add(request);
 589			}
 590			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 591
 592			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 593			if (prime == 1)
 594				times[0] = times[1];
 595
 596			if (__igt_timeout(end_time, NULL))
 597				break;
 598		}
 599
 600		err = igt_live_test_end(&t);
 601		if (err)
 602			goto out_unlock;
 603
 604		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 605			engine->name,
 606			ktime_to_ns(times[0]),
 607			prime, div64_u64(ktime_to_ns(times[1]), prime));
 608	}
 609
 610out_unlock:
 611	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 612	mutex_unlock(&i915->drm.struct_mutex);
 613	return err;
 614}
 615
 616static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 617{
 618	struct drm_i915_gem_object *obj;
 619	struct i915_vma *vma;
 620	u32 *cmd;
 621	int err;
 622
 623	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 624	if (IS_ERR(obj))
 625		return ERR_CAST(obj);
 626
 627	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 628	if (IS_ERR(cmd)) {
 629		err = PTR_ERR(cmd);
 630		goto err;
 631	}
 632
 633	*cmd = MI_BATCH_BUFFER_END;
 634
 635	__i915_gem_object_flush_map(obj, 0, 64);
 636	i915_gem_object_unpin_map(obj);
 637
 638	intel_gt_chipset_flush(&i915->gt);
 639
 640	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 641	if (IS_ERR(vma)) {
 642		err = PTR_ERR(vma);
 643		goto err;
 644	}
 645
 646	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
 647	if (err)
 648		goto err;
 649
 650	return vma;
 651
 652err:
 653	i915_gem_object_put(obj);
 654	return ERR_PTR(err);
 655}
 656
 657static struct i915_request *
 658empty_request(struct intel_engine_cs *engine,
 659	      struct i915_vma *batch)
 660{
 661	struct i915_request *request;
 662	int err;
 663
 664	request = i915_request_create(engine->kernel_context);
 665	if (IS_ERR(request))
 666		return request;
 667
 668	err = engine->emit_bb_start(request,
 669				    batch->node.start,
 670				    batch->node.size,
 671				    I915_DISPATCH_SECURE);
 672	if (err)
 673		goto out_request;
 674
 675out_request:
 676	i915_request_add(request);
 677	return err ? ERR_PTR(err) : request;
 678}
 679
 680static int live_empty_request(void *arg)
 681{
 682	struct drm_i915_private *i915 = arg;
 683	struct intel_engine_cs *engine;
 684	intel_wakeref_t wakeref;
 685	struct igt_live_test t;
 686	struct i915_vma *batch;
 687	unsigned int id;
 688	int err = 0;
 689
 690	/* Submit various sized batches of empty requests, to each engine
 691	 * (individually), and wait for the batch to complete. We can check
 692	 * the overhead of submitting requests to the hardware.
 693	 */
 694
 695	mutex_lock(&i915->drm.struct_mutex);
 696	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 697
 698	batch = empty_batch(i915);
 699	if (IS_ERR(batch)) {
 700		err = PTR_ERR(batch);
 701		goto out_unlock;
 702	}
 703
 704	for_each_engine(engine, i915, id) {
 705		IGT_TIMEOUT(end_time);
 706		struct i915_request *request;
 707		unsigned long n, prime;
 708		ktime_t times[2] = {};
 709
 710		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 711		if (err)
 712			goto out_batch;
 713
 714		/* Warmup / preload */
 715		request = empty_request(engine, batch);
 716		if (IS_ERR(request)) {
 717			err = PTR_ERR(request);
 718			goto out_batch;
 719		}
 720		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 721
 722		for_each_prime_number_from(prime, 1, 8192) {
 723			times[1] = ktime_get_raw();
 724
 725			for (n = 0; n < prime; n++) {
 726				request = empty_request(engine, batch);
 727				if (IS_ERR(request)) {
 728					err = PTR_ERR(request);
 729					goto out_batch;
 730				}
 731			}
 732			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 733
 734			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 735			if (prime == 1)
 736				times[0] = times[1];
 737
 738			if (__igt_timeout(end_time, NULL))
 739				break;
 740		}
 741
 742		err = igt_live_test_end(&t);
 743		if (err)
 744			goto out_batch;
 745
 746		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 747			engine->name,
 748			ktime_to_ns(times[0]),
 749			prime, div64_u64(ktime_to_ns(times[1]), prime));
 750	}
 751
 752out_batch:
 753	i915_vma_unpin(batch);
 754	i915_vma_put(batch);
 755out_unlock:
 756	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 757	mutex_unlock(&i915->drm.struct_mutex);
 758	return err;
 759}
 760
 761static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 762{
 763	struct i915_gem_context *ctx = i915->kernel_context;
 764	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 765	struct drm_i915_gem_object *obj;
 766	const int gen = INTEL_GEN(i915);
 767	struct i915_vma *vma;
 768	u32 *cmd;
 769	int err;
 770
 771	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 772	if (IS_ERR(obj))
 773		return ERR_CAST(obj);
 774
 775	vma = i915_vma_instance(obj, vm, NULL);
 776	if (IS_ERR(vma)) {
 777		err = PTR_ERR(vma);
 778		goto err;
 779	}
 780
 781	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 782	if (err)
 783		goto err;
 784
 785	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
 786	if (IS_ERR(cmd)) {
 787		err = PTR_ERR(cmd);
 788		goto err;
 789	}
 790
 791	if (gen >= 8) {
 792		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 793		*cmd++ = lower_32_bits(vma->node.start);
 794		*cmd++ = upper_32_bits(vma->node.start);
 795	} else if (gen >= 6) {
 796		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
 797		*cmd++ = lower_32_bits(vma->node.start);
 798	} else {
 799		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 800		*cmd++ = lower_32_bits(vma->node.start);
 801	}
 802	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
 803
 804	__i915_gem_object_flush_map(obj, 0, 64);
 805	i915_gem_object_unpin_map(obj);
 806
 807	intel_gt_chipset_flush(&i915->gt);
 808
 809	return vma;
 810
 811err:
 812	i915_gem_object_put(obj);
 813	return ERR_PTR(err);
 814}
 815
 816static int recursive_batch_resolve(struct i915_vma *batch)
 817{
 818	u32 *cmd;
 819
 820	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
 821	if (IS_ERR(cmd))
 822		return PTR_ERR(cmd);
 823
 824	*cmd = MI_BATCH_BUFFER_END;
 825	intel_gt_chipset_flush(batch->vm->gt);
 826
 827	i915_gem_object_unpin_map(batch->obj);
 828
 829	return 0;
 830}
 831
 832static int live_all_engines(void *arg)
 833{
 834	struct drm_i915_private *i915 = arg;
 835	struct intel_engine_cs *engine;
 836	struct i915_request *request[I915_NUM_ENGINES];
 837	intel_wakeref_t wakeref;
 838	struct igt_live_test t;
 839	struct i915_vma *batch;
 840	unsigned int id;
 841	int err;
 842
 843	/* Check we can submit requests to all engines simultaneously. We
 844	 * send a recursive batch to each engine - checking that we don't
 845	 * block doing so, and that they don't complete too soon.
 846	 */
 847
 848	mutex_lock(&i915->drm.struct_mutex);
 849	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 850
 851	err = igt_live_test_begin(&t, i915, __func__, "");
 852	if (err)
 853		goto out_unlock;
 854
 855	batch = recursive_batch(i915);
 856	if (IS_ERR(batch)) {
 857		err = PTR_ERR(batch);
 858		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
 859		goto out_unlock;
 860	}
 861
 862	for_each_engine(engine, i915, id) {
 863		request[id] = i915_request_create(engine->kernel_context);
 864		if (IS_ERR(request[id])) {
 865			err = PTR_ERR(request[id]);
 866			pr_err("%s: Request allocation failed with err=%d\n",
 867			       __func__, err);
 868			goto out_request;
 869		}
 870
 871		err = engine->emit_bb_start(request[id],
 872					    batch->node.start,
 873					    batch->node.size,
 874					    0);
 875		GEM_BUG_ON(err);
 876		request[id]->batch = batch;
 877
 878		i915_vma_lock(batch);
 879		err = i915_request_await_object(request[id], batch->obj, 0);
 880		if (err == 0)
 881			err = i915_vma_move_to_active(batch, request[id], 0);
 882		i915_vma_unlock(batch);
 883		GEM_BUG_ON(err);
 884
 885		i915_request_get(request[id]);
 886		i915_request_add(request[id]);
 887	}
 888
 889	for_each_engine(engine, i915, id) {
 890		if (i915_request_completed(request[id])) {
 891			pr_err("%s(%s): request completed too early!\n",
 892			       __func__, engine->name);
 893			err = -EINVAL;
 894			goto out_request;
 895		}
 896	}
 897
 898	err = recursive_batch_resolve(batch);
 899	if (err) {
 900		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
 901		goto out_request;
 902	}
 903
 904	for_each_engine(engine, i915, id) {
 905		long timeout;
 906
 907		timeout = i915_request_wait(request[id], 0,
 908					    MAX_SCHEDULE_TIMEOUT);
 909		if (timeout < 0) {
 910			err = timeout;
 911			pr_err("%s: error waiting for request on %s, err=%d\n",
 912			       __func__, engine->name, err);
 913			goto out_request;
 914		}
 915
 916		GEM_BUG_ON(!i915_request_completed(request[id]));
 917		i915_request_put(request[id]);
 918		request[id] = NULL;
 919	}
 920
 921	err = igt_live_test_end(&t);
 922
 923out_request:
 924	for_each_engine(engine, i915, id)
 925		if (request[id])
 926			i915_request_put(request[id]);
 927	i915_vma_unpin(batch);
 928	i915_vma_put(batch);
 929out_unlock:
 930	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 931	mutex_unlock(&i915->drm.struct_mutex);
 932	return err;
 933}
 934
 935static int live_sequential_engines(void *arg)
 936{
 937	struct drm_i915_private *i915 = arg;
 938	struct i915_request *request[I915_NUM_ENGINES] = {};
 939	struct i915_request *prev = NULL;
 940	struct intel_engine_cs *engine;
 941	intel_wakeref_t wakeref;
 942	struct igt_live_test t;
 943	unsigned int id;
 944	int err;
 945
 946	/* Check we can submit requests to all engines sequentially, such
 947	 * that each successive request waits for the earlier ones. This
 948	 * tests that we don't execute requests out of order, even though
 949	 * they are running on independent engines.
 950	 */
 951
 952	mutex_lock(&i915->drm.struct_mutex);
 953	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 954
 955	err = igt_live_test_begin(&t, i915, __func__, "");
 956	if (err)
 957		goto out_unlock;
 958
 959	for_each_engine(engine, i915, id) {
 960		struct i915_vma *batch;
 961
 962		batch = recursive_batch(i915);
 963		if (IS_ERR(batch)) {
 964			err = PTR_ERR(batch);
 965			pr_err("%s: Unable to create batch for %s, err=%d\n",
 966			       __func__, engine->name, err);
 967			goto out_unlock;
 968		}
 969
 970		request[id] = i915_request_create(engine->kernel_context);
 971		if (IS_ERR(request[id])) {
 972			err = PTR_ERR(request[id]);
 973			pr_err("%s: Request allocation failed for %s with err=%d\n",
 974			       __func__, engine->name, err);
 975			goto out_request;
 976		}
 977
 978		if (prev) {
 979			err = i915_request_await_dma_fence(request[id],
 980							   &prev->fence);
 981			if (err) {
 982				i915_request_add(request[id]);
 983				pr_err("%s: Request await failed for %s with err=%d\n",
 984				       __func__, engine->name, err);
 985				goto out_request;
 986			}
 987		}
 988
 989		err = engine->emit_bb_start(request[id],
 990					    batch->node.start,
 991					    batch->node.size,
 992					    0);
 993		GEM_BUG_ON(err);
 994		request[id]->batch = batch;
 995
 996		i915_vma_lock(batch);
 997		err = i915_request_await_object(request[id], batch->obj, false);
 998		if (err == 0)
 999			err = i915_vma_move_to_active(batch, request[id], 0);
1000		i915_vma_unlock(batch);
1001		GEM_BUG_ON(err);
1002
1003		i915_request_get(request[id]);
1004		i915_request_add(request[id]);
1005
1006		prev = request[id];
1007	}
1008
1009	for_each_engine(engine, i915, id) {
1010		long timeout;
1011
1012		if (i915_request_completed(request[id])) {
1013			pr_err("%s(%s): request completed too early!\n",
1014			       __func__, engine->name);
1015			err = -EINVAL;
1016			goto out_request;
1017		}
1018
1019		err = recursive_batch_resolve(request[id]->batch);
1020		if (err) {
1021			pr_err("%s: failed to resolve batch, err=%d\n",
1022			       __func__, err);
1023			goto out_request;
1024		}
1025
1026		timeout = i915_request_wait(request[id], 0,
1027					    MAX_SCHEDULE_TIMEOUT);
1028		if (timeout < 0) {
1029			err = timeout;
1030			pr_err("%s: error waiting for request on %s, err=%d\n",
1031			       __func__, engine->name, err);
1032			goto out_request;
1033		}
1034
1035		GEM_BUG_ON(!i915_request_completed(request[id]));
1036	}
1037
1038	err = igt_live_test_end(&t);
1039
1040out_request:
1041	for_each_engine(engine, i915, id) {
1042		u32 *cmd;
1043
1044		if (!request[id])
1045			break;
1046
1047		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1048					      I915_MAP_WC);
1049		if (!IS_ERR(cmd)) {
1050			*cmd = MI_BATCH_BUFFER_END;
1051			intel_gt_chipset_flush(engine->gt);
1052
1053			i915_gem_object_unpin_map(request[id]->batch->obj);
1054		}
1055
1056		i915_vma_put(request[id]->batch);
1057		i915_request_put(request[id]);
1058	}
1059out_unlock:
1060	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1061	mutex_unlock(&i915->drm.struct_mutex);
1062	return err;
1063}
1064
1065static int
1066max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1067{
1068	struct i915_request *rq;
1069	int ret;
1070
1071	/*
1072	 * Before execlists, all contexts share the same ringbuffer. With
1073	 * execlists, each context/engine has a separate ringbuffer and
1074	 * for the purposes of this test, inexhaustible.
1075	 *
1076	 * For the global ringbuffer though, we have to be very careful
1077	 * that we do not wrap while preventing the execution of requests
1078	 * with a unsignaled fence.
1079	 */
1080	if (HAS_EXECLISTS(ctx->i915))
1081		return INT_MAX;
1082
1083	rq = igt_request_alloc(ctx, engine);
1084	if (IS_ERR(rq)) {
1085		ret = PTR_ERR(rq);
1086	} else {
1087		int sz;
1088
1089		ret = rq->ring->size - rq->reserved_space;
1090		i915_request_add(rq);
1091
1092		sz = rq->ring->emit - rq->head;
1093		if (sz < 0)
1094			sz += rq->ring->size;
1095		ret /= sz;
1096		ret /= 2; /* leave half spare, in case of emergency! */
1097	}
1098
1099	return ret;
1100}
1101
1102static int live_breadcrumbs_smoketest(void *arg)
1103{
1104	struct drm_i915_private *i915 = arg;
1105	struct smoketest t[I915_NUM_ENGINES];
1106	unsigned int ncpus = num_online_cpus();
1107	unsigned long num_waits, num_fences;
1108	struct intel_engine_cs *engine;
1109	struct task_struct **threads;
1110	struct igt_live_test live;
1111	enum intel_engine_id id;
1112	intel_wakeref_t wakeref;
1113	struct drm_file *file;
1114	unsigned int n;
1115	int ret = 0;
1116
1117	/*
1118	 * Smoketest our breadcrumb/signal handling for requests across multiple
1119	 * threads. A very simple test to only catch the most egregious of bugs.
1120	 * See __igt_breadcrumbs_smoketest();
1121	 *
1122	 * On real hardware this time.
1123	 */
1124
1125	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1126
1127	file = mock_file(i915);
1128	if (IS_ERR(file)) {
1129		ret = PTR_ERR(file);
1130		goto out_rpm;
1131	}
1132
1133	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1134			  sizeof(*threads),
1135			  GFP_KERNEL);
1136	if (!threads) {
1137		ret = -ENOMEM;
1138		goto out_file;
1139	}
1140
1141	memset(&t[0], 0, sizeof(t[0]));
1142	t[0].request_alloc = __live_request_alloc;
1143	t[0].ncontexts = 64;
1144	t[0].contexts = kmalloc_array(t[0].ncontexts,
1145				      sizeof(*t[0].contexts),
1146				      GFP_KERNEL);
1147	if (!t[0].contexts) {
1148		ret = -ENOMEM;
1149		goto out_threads;
1150	}
1151
1152	mutex_lock(&i915->drm.struct_mutex);
1153	for (n = 0; n < t[0].ncontexts; n++) {
1154		t[0].contexts[n] = live_context(i915, file);
1155		if (!t[0].contexts[n]) {
1156			ret = -ENOMEM;
1157			goto out_contexts;
1158		}
1159	}
1160
1161	ret = igt_live_test_begin(&live, i915, __func__, "");
1162	if (ret)
1163		goto out_contexts;
1164
1165	for_each_engine(engine, i915, id) {
1166		t[id] = t[0];
1167		t[id].engine = engine;
1168		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1169		if (t[id].max_batch < 0) {
1170			ret = t[id].max_batch;
1171			mutex_unlock(&i915->drm.struct_mutex);
1172			goto out_flush;
1173		}
1174		/* One ring interleaved between requests from all cpus */
1175		t[id].max_batch /= num_online_cpus() + 1;
1176		pr_debug("Limiting batches to %d requests on %s\n",
1177			 t[id].max_batch, engine->name);
1178
1179		for (n = 0; n < ncpus; n++) {
1180			struct task_struct *tsk;
1181
1182			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1183					  &t[id], "igt/%d.%d", id, n);
1184			if (IS_ERR(tsk)) {
1185				ret = PTR_ERR(tsk);
1186				mutex_unlock(&i915->drm.struct_mutex);
1187				goto out_flush;
1188			}
1189
1190			get_task_struct(tsk);
1191			threads[id * ncpus + n] = tsk;
1192		}
1193	}
1194	mutex_unlock(&i915->drm.struct_mutex);
1195
1196	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1197
1198out_flush:
1199	num_waits = 0;
1200	num_fences = 0;
1201	for_each_engine(engine, i915, id) {
1202		for (n = 0; n < ncpus; n++) {
1203			struct task_struct *tsk = threads[id * ncpus + n];
1204			int err;
1205
1206			if (!tsk)
1207				continue;
1208
1209			err = kthread_stop(tsk);
1210			if (err < 0 && !ret)
1211				ret = err;
1212
1213			put_task_struct(tsk);
1214		}
1215
1216		num_waits += atomic_long_read(&t[id].num_waits);
1217		num_fences += atomic_long_read(&t[id].num_fences);
1218	}
1219	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1220		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1221
1222	mutex_lock(&i915->drm.struct_mutex);
1223	ret = igt_live_test_end(&live) ?: ret;
1224out_contexts:
1225	mutex_unlock(&i915->drm.struct_mutex);
1226	kfree(t[0].contexts);
1227out_threads:
1228	kfree(threads);
1229out_file:
1230	mock_file_free(i915, file);
1231out_rpm:
1232	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1233
1234	return ret;
1235}
1236
1237int i915_request_live_selftests(struct drm_i915_private *i915)
1238{
1239	static const struct i915_subtest tests[] = {
1240		SUBTEST(live_nop_request),
1241		SUBTEST(live_all_engines),
1242		SUBTEST(live_sequential_engines),
1243		SUBTEST(live_empty_request),
1244		SUBTEST(live_breadcrumbs_smoketest),
1245	};
1246
1247	if (intel_gt_is_wedged(&i915->gt))
1248		return 0;
1249
1250	return i915_subtests(tests, i915);
1251}