Loading...
Note: File does not exist in v3.1.
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/prime_numbers.h>
26#include <linux/pm_qos.h>
27#include <linux/sort.h>
28
29#include "gem/i915_gem_internal.h"
30#include "gem/i915_gem_pm.h"
31#include "gem/selftests/mock_context.h"
32
33#include "gt/intel_engine_heartbeat.h"
34#include "gt/intel_engine_pm.h"
35#include "gt/intel_engine_user.h"
36#include "gt/intel_gt.h"
37#include "gt/intel_gt_clock_utils.h"
38#include "gt/intel_gt_requests.h"
39#include "gt/selftest_engine_heartbeat.h"
40
41#include "i915_random.h"
42#include "i915_selftest.h"
43#include "igt_flush_test.h"
44#include "igt_live_test.h"
45#include "igt_spinner.h"
46#include "lib_sw_fence.h"
47
48#include "mock_drm.h"
49#include "mock_gem_device.h"
50
51static unsigned int num_uabi_engines(struct drm_i915_private *i915)
52{
53 struct intel_engine_cs *engine;
54 unsigned int count;
55
56 count = 0;
57 for_each_uabi_engine(engine, i915)
58 count++;
59
60 return count;
61}
62
63static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
64{
65 return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0);
66}
67
68static int igt_add_request(void *arg)
69{
70 struct drm_i915_private *i915 = arg;
71 struct i915_request *request;
72
73 /* Basic preliminary test to create a request and let it loose! */
74
75 request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
76 if (!request)
77 return -ENOMEM;
78
79 i915_request_add(request);
80
81 return 0;
82}
83
84static int igt_wait_request(void *arg)
85{
86 const long T = HZ / 4;
87 struct drm_i915_private *i915 = arg;
88 struct i915_request *request;
89 int err = -EINVAL;
90
91 /* Submit a request, then wait upon it */
92
93 request = mock_request(rcs0(i915)->kernel_context, T);
94 if (!request)
95 return -ENOMEM;
96
97 i915_request_get(request);
98
99 if (i915_request_wait(request, 0, 0) != -ETIME) {
100 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
101 goto out_request;
102 }
103
104 if (i915_request_wait(request, 0, T) != -ETIME) {
105 pr_err("request wait succeeded (expected timeout before submit!)\n");
106 goto out_request;
107 }
108
109 if (i915_request_completed(request)) {
110 pr_err("request completed before submit!!\n");
111 goto out_request;
112 }
113
114 i915_request_add(request);
115
116 if (i915_request_wait(request, 0, 0) != -ETIME) {
117 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
118 goto out_request;
119 }
120
121 if (i915_request_completed(request)) {
122 pr_err("request completed immediately!\n");
123 goto out_request;
124 }
125
126 if (i915_request_wait(request, 0, T / 2) != -ETIME) {
127 pr_err("request wait succeeded (expected timeout!)\n");
128 goto out_request;
129 }
130
131 if (i915_request_wait(request, 0, T) == -ETIME) {
132 pr_err("request wait timed out!\n");
133 goto out_request;
134 }
135
136 if (!i915_request_completed(request)) {
137 pr_err("request not complete after waiting!\n");
138 goto out_request;
139 }
140
141 if (i915_request_wait(request, 0, T) == -ETIME) {
142 pr_err("request wait timed out when already complete!\n");
143 goto out_request;
144 }
145
146 err = 0;
147out_request:
148 i915_request_put(request);
149 mock_device_flush(i915);
150 return err;
151}
152
153static int igt_fence_wait(void *arg)
154{
155 const long T = HZ / 4;
156 struct drm_i915_private *i915 = arg;
157 struct i915_request *request;
158 int err = -EINVAL;
159
160 /* Submit a request, treat it as a fence and wait upon it */
161
162 request = mock_request(rcs0(i915)->kernel_context, T);
163 if (!request)
164 return -ENOMEM;
165
166 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
167 pr_err("fence wait success before submit (expected timeout)!\n");
168 goto out;
169 }
170
171 i915_request_add(request);
172
173 if (dma_fence_is_signaled(&request->fence)) {
174 pr_err("fence signaled immediately!\n");
175 goto out;
176 }
177
178 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
179 pr_err("fence wait success after submit (expected timeout)!\n");
180 goto out;
181 }
182
183 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
184 pr_err("fence wait timed out (expected success)!\n");
185 goto out;
186 }
187
188 if (!dma_fence_is_signaled(&request->fence)) {
189 pr_err("fence unsignaled after waiting!\n");
190 goto out;
191 }
192
193 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
194 pr_err("fence wait timed out when complete (expected success)!\n");
195 goto out;
196 }
197
198 err = 0;
199out:
200 mock_device_flush(i915);
201 return err;
202}
203
204static int igt_request_rewind(void *arg)
205{
206 struct drm_i915_private *i915 = arg;
207 struct i915_request *request, *vip;
208 struct i915_gem_context *ctx[2];
209 struct intel_context *ce;
210 int err = -EINVAL;
211
212 ctx[0] = mock_context(i915, "A");
213 if (!ctx[0]) {
214 err = -ENOMEM;
215 goto err_ctx_0;
216 }
217
218 ce = i915_gem_context_get_engine(ctx[0], RCS0);
219 GEM_BUG_ON(IS_ERR(ce));
220 request = mock_request(ce, 2 * HZ);
221 intel_context_put(ce);
222 if (!request) {
223 err = -ENOMEM;
224 goto err_context_0;
225 }
226
227 i915_request_get(request);
228 i915_request_add(request);
229
230 ctx[1] = mock_context(i915, "B");
231 if (!ctx[1]) {
232 err = -ENOMEM;
233 goto err_ctx_1;
234 }
235
236 ce = i915_gem_context_get_engine(ctx[1], RCS0);
237 GEM_BUG_ON(IS_ERR(ce));
238 vip = mock_request(ce, 0);
239 intel_context_put(ce);
240 if (!vip) {
241 err = -ENOMEM;
242 goto err_context_1;
243 }
244
245 /* Simulate preemption by manual reordering */
246 if (!mock_cancel_request(request)) {
247 pr_err("failed to cancel request (already executed)!\n");
248 i915_request_add(vip);
249 goto err_context_1;
250 }
251 i915_request_get(vip);
252 i915_request_add(vip);
253 rcu_read_lock();
254 request->engine->submit_request(request);
255 rcu_read_unlock();
256
257
258 if (i915_request_wait(vip, 0, HZ) == -ETIME) {
259 pr_err("timed out waiting for high priority request\n");
260 goto err;
261 }
262
263 if (i915_request_completed(request)) {
264 pr_err("low priority request already completed\n");
265 goto err;
266 }
267
268 err = 0;
269err:
270 i915_request_put(vip);
271err_context_1:
272 mock_context_close(ctx[1]);
273err_ctx_1:
274 i915_request_put(request);
275err_context_0:
276 mock_context_close(ctx[0]);
277err_ctx_0:
278 mock_device_flush(i915);
279 return err;
280}
281
282struct smoketest {
283 struct intel_engine_cs *engine;
284 struct i915_gem_context **contexts;
285 atomic_long_t num_waits, num_fences;
286 int ncontexts, max_batch;
287 struct i915_request *(*request_alloc)(struct intel_context *ce);
288};
289
290static struct i915_request *
291__mock_request_alloc(struct intel_context *ce)
292{
293 return mock_request(ce, 0);
294}
295
296static struct i915_request *
297__live_request_alloc(struct intel_context *ce)
298{
299 return intel_context_create_request(ce);
300}
301
302struct smoke_thread {
303 struct kthread_worker *worker;
304 struct kthread_work work;
305 struct smoketest *t;
306 bool stop;
307 int result;
308};
309
310static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
311{
312 struct smoke_thread *thread = container_of(work, typeof(*thread), work);
313 struct smoketest *t = thread->t;
314 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
315 const unsigned int total = 4 * t->ncontexts + 1;
316 unsigned int num_waits = 0, num_fences = 0;
317 struct i915_request **requests;
318 I915_RND_STATE(prng);
319 unsigned int *order;
320 int err = 0;
321
322 /*
323 * A very simple test to catch the most egregious of list handling bugs.
324 *
325 * At its heart, we simply create oodles of requests running across
326 * multiple kthreads and enable signaling on them, for the sole purpose
327 * of stressing our breadcrumb handling. The only inspection we do is
328 * that the fences were marked as signaled.
329 */
330
331 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
332 if (!requests) {
333 thread->result = -ENOMEM;
334 return;
335 }
336
337 order = i915_random_order(total, &prng);
338 if (!order) {
339 err = -ENOMEM;
340 goto out_requests;
341 }
342
343 while (!READ_ONCE(thread->stop)) {
344 struct i915_sw_fence *submit, *wait;
345 unsigned int n, count;
346
347 submit = heap_fence_create(GFP_KERNEL);
348 if (!submit) {
349 err = -ENOMEM;
350 break;
351 }
352
353 wait = heap_fence_create(GFP_KERNEL);
354 if (!wait) {
355 i915_sw_fence_commit(submit);
356 heap_fence_put(submit);
357 err = -ENOMEM;
358 break;
359 }
360
361 i915_random_reorder(order, total, &prng);
362 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
363
364 for (n = 0; n < count; n++) {
365 struct i915_gem_context *ctx =
366 t->contexts[order[n] % t->ncontexts];
367 struct i915_request *rq;
368 struct intel_context *ce;
369
370 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
371 GEM_BUG_ON(IS_ERR(ce));
372 rq = t->request_alloc(ce);
373 intel_context_put(ce);
374 if (IS_ERR(rq)) {
375 err = PTR_ERR(rq);
376 count = n;
377 break;
378 }
379
380 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
381 submit,
382 GFP_KERNEL);
383
384 requests[n] = i915_request_get(rq);
385 i915_request_add(rq);
386
387 if (err >= 0)
388 err = i915_sw_fence_await_dma_fence(wait,
389 &rq->fence,
390 0,
391 GFP_KERNEL);
392
393 if (err < 0) {
394 i915_request_put(rq);
395 count = n;
396 break;
397 }
398 }
399
400 i915_sw_fence_commit(submit);
401 i915_sw_fence_commit(wait);
402
403 if (!wait_event_timeout(wait->wait,
404 i915_sw_fence_done(wait),
405 5 * HZ)) {
406 struct i915_request *rq = requests[count - 1];
407
408 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
409 atomic_read(&wait->pending), count,
410 rq->fence.context, rq->fence.seqno,
411 t->engine->name);
412 GEM_TRACE_DUMP();
413
414 intel_gt_set_wedged(t->engine->gt);
415 GEM_BUG_ON(!i915_request_completed(rq));
416 i915_sw_fence_wait(wait);
417 err = -EIO;
418 }
419
420 for (n = 0; n < count; n++) {
421 struct i915_request *rq = requests[n];
422
423 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
424 &rq->fence.flags)) {
425 pr_err("%llu:%llu was not signaled!\n",
426 rq->fence.context, rq->fence.seqno);
427 err = -EINVAL;
428 }
429
430 i915_request_put(rq);
431 }
432
433 heap_fence_put(wait);
434 heap_fence_put(submit);
435
436 if (err < 0)
437 break;
438
439 num_fences += count;
440 num_waits++;
441
442 cond_resched();
443 }
444
445 atomic_long_add(num_fences, &t->num_fences);
446 atomic_long_add(num_waits, &t->num_waits);
447
448 kfree(order);
449out_requests:
450 kfree(requests);
451 thread->result = err;
452}
453
454static int mock_breadcrumbs_smoketest(void *arg)
455{
456 struct drm_i915_private *i915 = arg;
457 struct smoketest t = {
458 .engine = rcs0(i915),
459 .ncontexts = 1024,
460 .max_batch = 1024,
461 .request_alloc = __mock_request_alloc
462 };
463 unsigned int ncpus = num_online_cpus();
464 struct smoke_thread *threads;
465 unsigned int n;
466 int ret = 0;
467
468 /*
469 * Smoketest our breadcrumb/signal handling for requests across multiple
470 * threads. A very simple test to only catch the most egregious of bugs.
471 * See __igt_breadcrumbs_smoketest();
472 */
473
474 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
475 if (!threads)
476 return -ENOMEM;
477
478 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
479 if (!t.contexts) {
480 ret = -ENOMEM;
481 goto out_threads;
482 }
483
484 for (n = 0; n < t.ncontexts; n++) {
485 t.contexts[n] = mock_context(t.engine->i915, "mock");
486 if (!t.contexts[n]) {
487 ret = -ENOMEM;
488 goto out_contexts;
489 }
490 }
491
492 for (n = 0; n < ncpus; n++) {
493 struct kthread_worker *worker;
494
495 worker = kthread_create_worker(0, "igt/%d", n);
496 if (IS_ERR(worker)) {
497 ret = PTR_ERR(worker);
498 ncpus = n;
499 break;
500 }
501
502 threads[n].worker = worker;
503 threads[n].t = &t;
504 threads[n].stop = false;
505 threads[n].result = 0;
506
507 kthread_init_work(&threads[n].work,
508 __igt_breadcrumbs_smoketest);
509 kthread_queue_work(worker, &threads[n].work);
510 }
511
512 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
513
514 for (n = 0; n < ncpus; n++) {
515 int err;
516
517 WRITE_ONCE(threads[n].stop, true);
518 kthread_flush_work(&threads[n].work);
519 err = READ_ONCE(threads[n].result);
520 if (err < 0 && !ret)
521 ret = err;
522
523 kthread_destroy_worker(threads[n].worker);
524 }
525 pr_info("Completed %lu waits for %lu fence across %d cpus\n",
526 atomic_long_read(&t.num_waits),
527 atomic_long_read(&t.num_fences),
528 ncpus);
529
530out_contexts:
531 for (n = 0; n < t.ncontexts; n++) {
532 if (!t.contexts[n])
533 break;
534 mock_context_close(t.contexts[n]);
535 }
536 kfree(t.contexts);
537out_threads:
538 kfree(threads);
539 return ret;
540}
541
542int i915_request_mock_selftests(void)
543{
544 static const struct i915_subtest tests[] = {
545 SUBTEST(igt_add_request),
546 SUBTEST(igt_wait_request),
547 SUBTEST(igt_fence_wait),
548 SUBTEST(igt_request_rewind),
549 SUBTEST(mock_breadcrumbs_smoketest),
550 };
551 struct drm_i915_private *i915;
552 intel_wakeref_t wakeref;
553 int err = 0;
554
555 i915 = mock_gem_device();
556 if (!i915)
557 return -ENOMEM;
558
559 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
560 err = i915_subtests(tests, i915);
561
562 mock_destroy_device(i915);
563
564 return err;
565}
566
567static int live_nop_request(void *arg)
568{
569 struct drm_i915_private *i915 = arg;
570 struct intel_engine_cs *engine;
571 struct igt_live_test t;
572 int err = -ENODEV;
573
574 /*
575 * Submit various sized batches of empty requests, to each engine
576 * (individually), and wait for the batch to complete. We can check
577 * the overhead of submitting requests to the hardware.
578 */
579
580 for_each_uabi_engine(engine, i915) {
581 unsigned long n, prime;
582 IGT_TIMEOUT(end_time);
583 ktime_t times[2] = {};
584
585 err = igt_live_test_begin(&t, i915, __func__, engine->name);
586 if (err)
587 return err;
588
589 intel_engine_pm_get(engine);
590 for_each_prime_number_from(prime, 1, 8192) {
591 struct i915_request *request = NULL;
592
593 times[1] = ktime_get_raw();
594
595 for (n = 0; n < prime; n++) {
596 i915_request_put(request);
597 request = i915_request_create(engine->kernel_context);
598 if (IS_ERR(request))
599 return PTR_ERR(request);
600
601 /*
602 * This space is left intentionally blank.
603 *
604 * We do not actually want to perform any
605 * action with this request, we just want
606 * to measure the latency in allocation
607 * and submission of our breadcrumbs -
608 * ensuring that the bare request is sufficient
609 * for the system to work (i.e. proper HEAD
610 * tracking of the rings, interrupt handling,
611 * etc). It also gives us the lowest bounds
612 * for latency.
613 */
614
615 i915_request_get(request);
616 i915_request_add(request);
617 }
618 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
619 i915_request_put(request);
620
621 times[1] = ktime_sub(ktime_get_raw(), times[1]);
622 if (prime == 1)
623 times[0] = times[1];
624
625 if (__igt_timeout(end_time, NULL))
626 break;
627 }
628 intel_engine_pm_put(engine);
629
630 err = igt_live_test_end(&t);
631 if (err)
632 return err;
633
634 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
635 engine->name,
636 ktime_to_ns(times[0]),
637 prime, div64_u64(ktime_to_ns(times[1]), prime));
638 }
639
640 return err;
641}
642
643static int __cancel_inactive(struct intel_engine_cs *engine)
644{
645 struct intel_context *ce;
646 struct igt_spinner spin;
647 struct i915_request *rq;
648 int err = 0;
649
650 if (igt_spinner_init(&spin, engine->gt))
651 return -ENOMEM;
652
653 ce = intel_context_create(engine);
654 if (IS_ERR(ce)) {
655 err = PTR_ERR(ce);
656 goto out_spin;
657 }
658
659 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
660 if (IS_ERR(rq)) {
661 err = PTR_ERR(rq);
662 goto out_ce;
663 }
664
665 pr_debug("%s: Cancelling inactive request\n", engine->name);
666 i915_request_cancel(rq, -EINTR);
667 i915_request_get(rq);
668 i915_request_add(rq);
669
670 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
671 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
672
673 pr_err("%s: Failed to cancel inactive request\n", engine->name);
674 intel_engine_dump(engine, &p, "%s\n", engine->name);
675 err = -ETIME;
676 goto out_rq;
677 }
678
679 if (rq->fence.error != -EINTR) {
680 pr_err("%s: fence not cancelled (%u)\n",
681 engine->name, rq->fence.error);
682 err = -EINVAL;
683 }
684
685out_rq:
686 i915_request_put(rq);
687out_ce:
688 intel_context_put(ce);
689out_spin:
690 igt_spinner_fini(&spin);
691 if (err)
692 pr_err("%s: %s error %d\n", __func__, engine->name, err);
693 return err;
694}
695
696static int __cancel_active(struct intel_engine_cs *engine)
697{
698 struct intel_context *ce;
699 struct igt_spinner spin;
700 struct i915_request *rq;
701 int err = 0;
702
703 if (igt_spinner_init(&spin, engine->gt))
704 return -ENOMEM;
705
706 ce = intel_context_create(engine);
707 if (IS_ERR(ce)) {
708 err = PTR_ERR(ce);
709 goto out_spin;
710 }
711
712 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
713 if (IS_ERR(rq)) {
714 err = PTR_ERR(rq);
715 goto out_ce;
716 }
717
718 pr_debug("%s: Cancelling active request\n", engine->name);
719 i915_request_get(rq);
720 i915_request_add(rq);
721 if (!igt_wait_for_spinner(&spin, rq)) {
722 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
723
724 pr_err("Failed to start spinner on %s\n", engine->name);
725 intel_engine_dump(engine, &p, "%s\n", engine->name);
726 err = -ETIME;
727 goto out_rq;
728 }
729 i915_request_cancel(rq, -EINTR);
730
731 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
732 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
733
734 pr_err("%s: Failed to cancel active request\n", engine->name);
735 intel_engine_dump(engine, &p, "%s\n", engine->name);
736 err = -ETIME;
737 goto out_rq;
738 }
739
740 if (rq->fence.error != -EINTR) {
741 pr_err("%s: fence not cancelled (%u)\n",
742 engine->name, rq->fence.error);
743 err = -EINVAL;
744 }
745
746out_rq:
747 i915_request_put(rq);
748out_ce:
749 intel_context_put(ce);
750out_spin:
751 igt_spinner_fini(&spin);
752 if (err)
753 pr_err("%s: %s error %d\n", __func__, engine->name, err);
754 return err;
755}
756
757static int __cancel_completed(struct intel_engine_cs *engine)
758{
759 struct intel_context *ce;
760 struct igt_spinner spin;
761 struct i915_request *rq;
762 int err = 0;
763
764 if (igt_spinner_init(&spin, engine->gt))
765 return -ENOMEM;
766
767 ce = intel_context_create(engine);
768 if (IS_ERR(ce)) {
769 err = PTR_ERR(ce);
770 goto out_spin;
771 }
772
773 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
774 if (IS_ERR(rq)) {
775 err = PTR_ERR(rq);
776 goto out_ce;
777 }
778 igt_spinner_end(&spin);
779 i915_request_get(rq);
780 i915_request_add(rq);
781
782 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
783 err = -ETIME;
784 goto out_rq;
785 }
786
787 pr_debug("%s: Cancelling completed request\n", engine->name);
788 i915_request_cancel(rq, -EINTR);
789 if (rq->fence.error) {
790 pr_err("%s: fence not cancelled (%u)\n",
791 engine->name, rq->fence.error);
792 err = -EINVAL;
793 }
794
795out_rq:
796 i915_request_put(rq);
797out_ce:
798 intel_context_put(ce);
799out_spin:
800 igt_spinner_fini(&spin);
801 if (err)
802 pr_err("%s: %s error %d\n", __func__, engine->name, err);
803 return err;
804}
805
806/*
807 * Test to prove a non-preemptable request can be cancelled and a subsequent
808 * request on the same context can successfully complete after cancellation.
809 *
810 * Testing methodology is to create a non-preemptible request and submit it,
811 * wait for spinner to start, create a NOP request and submit it, cancel the
812 * spinner, wait for spinner to complete and verify it failed with an error,
813 * finally wait for NOP request to complete verify it succeeded without an
814 * error. Preemption timeout also reduced / restored so test runs in a timely
815 * maner.
816 */
817static int __cancel_reset(struct drm_i915_private *i915,
818 struct intel_engine_cs *engine)
819{
820 struct intel_context *ce;
821 struct igt_spinner spin;
822 struct i915_request *rq, *nop;
823 unsigned long preempt_timeout_ms;
824 int err = 0;
825
826 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT ||
827 !intel_has_reset_engine(engine->gt))
828 return 0;
829
830 preempt_timeout_ms = engine->props.preempt_timeout_ms;
831 engine->props.preempt_timeout_ms = 100;
832
833 if (igt_spinner_init(&spin, engine->gt))
834 goto out_restore;
835
836 ce = intel_context_create(engine);
837 if (IS_ERR(ce)) {
838 err = PTR_ERR(ce);
839 goto out_spin;
840 }
841
842 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
843 if (IS_ERR(rq)) {
844 err = PTR_ERR(rq);
845 goto out_ce;
846 }
847
848 pr_debug("%s: Cancelling active non-preemptable request\n",
849 engine->name);
850 i915_request_get(rq);
851 i915_request_add(rq);
852 if (!igt_wait_for_spinner(&spin, rq)) {
853 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
854
855 pr_err("Failed to start spinner on %s\n", engine->name);
856 intel_engine_dump(engine, &p, "%s\n", engine->name);
857 err = -ETIME;
858 goto out_rq;
859 }
860
861 nop = intel_context_create_request(ce);
862 if (IS_ERR(nop))
863 goto out_rq;
864 i915_request_get(nop);
865 i915_request_add(nop);
866
867 i915_request_cancel(rq, -EINTR);
868
869 if (i915_request_wait(rq, 0, HZ) < 0) {
870 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
871
872 pr_err("%s: Failed to cancel hung request\n", engine->name);
873 intel_engine_dump(engine, &p, "%s\n", engine->name);
874 err = -ETIME;
875 goto out_nop;
876 }
877
878 if (rq->fence.error != -EINTR) {
879 pr_err("%s: fence not cancelled (%u)\n",
880 engine->name, rq->fence.error);
881 err = -EINVAL;
882 goto out_nop;
883 }
884
885 if (i915_request_wait(nop, 0, HZ) < 0) {
886 struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
887
888 pr_err("%s: Failed to complete nop request\n", engine->name);
889 intel_engine_dump(engine, &p, "%s\n", engine->name);
890 err = -ETIME;
891 goto out_nop;
892 }
893
894 if (nop->fence.error != 0) {
895 pr_err("%s: Nop request errored (%u)\n",
896 engine->name, nop->fence.error);
897 err = -EINVAL;
898 }
899
900out_nop:
901 i915_request_put(nop);
902out_rq:
903 i915_request_put(rq);
904out_ce:
905 intel_context_put(ce);
906out_spin:
907 igt_spinner_fini(&spin);
908out_restore:
909 engine->props.preempt_timeout_ms = preempt_timeout_ms;
910 if (err)
911 pr_err("%s: %s error %d\n", __func__, engine->name, err);
912 return err;
913}
914
915static int live_cancel_request(void *arg)
916{
917 struct drm_i915_private *i915 = arg;
918 struct intel_engine_cs *engine;
919
920 /*
921 * Check cancellation of requests. We expect to be able to immediately
922 * cancel active requests, even if they are currently on the GPU.
923 */
924
925 for_each_uabi_engine(engine, i915) {
926 struct igt_live_test t;
927 int err, err2;
928
929 if (!intel_engine_has_preemption(engine))
930 continue;
931
932 err = igt_live_test_begin(&t, i915, __func__, engine->name);
933 if (err)
934 return err;
935
936 err = __cancel_inactive(engine);
937 if (err == 0)
938 err = __cancel_active(engine);
939 if (err == 0)
940 err = __cancel_completed(engine);
941
942 err2 = igt_live_test_end(&t);
943 if (err)
944 return err;
945 if (err2)
946 return err2;
947
948 /* Expects reset so call outside of igt_live_test_* */
949 err = __cancel_reset(i915, engine);
950 if (err)
951 return err;
952
953 if (igt_flush_test(i915))
954 return -EIO;
955 }
956
957 return 0;
958}
959
960static struct i915_vma *empty_batch(struct drm_i915_private *i915)
961{
962 struct drm_i915_gem_object *obj;
963 struct i915_vma *vma;
964 u32 *cmd;
965 int err;
966
967 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
968 if (IS_ERR(obj))
969 return ERR_CAST(obj);
970
971 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
972 if (IS_ERR(cmd)) {
973 err = PTR_ERR(cmd);
974 goto err;
975 }
976
977 *cmd = MI_BATCH_BUFFER_END;
978
979 __i915_gem_object_flush_map(obj, 0, 64);
980 i915_gem_object_unpin_map(obj);
981
982 intel_gt_chipset_flush(to_gt(i915));
983
984 vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
985 if (IS_ERR(vma)) {
986 err = PTR_ERR(vma);
987 goto err;
988 }
989
990 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
991 if (err)
992 goto err;
993
994 /* Force the wait now to avoid including it in the benchmark */
995 err = i915_vma_sync(vma);
996 if (err)
997 goto err_pin;
998
999 return vma;
1000
1001err_pin:
1002 i915_vma_unpin(vma);
1003err:
1004 i915_gem_object_put(obj);
1005 return ERR_PTR(err);
1006}
1007
1008static struct i915_request *
1009empty_request(struct intel_engine_cs *engine,
1010 struct i915_vma *batch)
1011{
1012 struct i915_request *request;
1013 int err;
1014
1015 request = i915_request_create(engine->kernel_context);
1016 if (IS_ERR(request))
1017 return request;
1018
1019 err = engine->emit_bb_start(request,
1020 batch->node.start,
1021 batch->node.size,
1022 I915_DISPATCH_SECURE);
1023 if (err)
1024 goto out_request;
1025
1026 i915_request_get(request);
1027out_request:
1028 i915_request_add(request);
1029 return err ? ERR_PTR(err) : request;
1030}
1031
1032static int live_empty_request(void *arg)
1033{
1034 struct drm_i915_private *i915 = arg;
1035 struct intel_engine_cs *engine;
1036 struct igt_live_test t;
1037 struct i915_vma *batch;
1038 int err = 0;
1039
1040 /*
1041 * Submit various sized batches of empty requests, to each engine
1042 * (individually), and wait for the batch to complete. We can check
1043 * the overhead of submitting requests to the hardware.
1044 */
1045
1046 batch = empty_batch(i915);
1047 if (IS_ERR(batch))
1048 return PTR_ERR(batch);
1049
1050 for_each_uabi_engine(engine, i915) {
1051 IGT_TIMEOUT(end_time);
1052 struct i915_request *request;
1053 unsigned long n, prime;
1054 ktime_t times[2] = {};
1055
1056 err = igt_live_test_begin(&t, i915, __func__, engine->name);
1057 if (err)
1058 goto out_batch;
1059
1060 intel_engine_pm_get(engine);
1061
1062 /* Warmup / preload */
1063 request = empty_request(engine, batch);
1064 if (IS_ERR(request)) {
1065 err = PTR_ERR(request);
1066 intel_engine_pm_put(engine);
1067 goto out_batch;
1068 }
1069 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
1070
1071 for_each_prime_number_from(prime, 1, 8192) {
1072 times[1] = ktime_get_raw();
1073
1074 for (n = 0; n < prime; n++) {
1075 i915_request_put(request);
1076 request = empty_request(engine, batch);
1077 if (IS_ERR(request)) {
1078 err = PTR_ERR(request);
1079 intel_engine_pm_put(engine);
1080 goto out_batch;
1081 }
1082 }
1083 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
1084
1085 times[1] = ktime_sub(ktime_get_raw(), times[1]);
1086 if (prime == 1)
1087 times[0] = times[1];
1088
1089 if (__igt_timeout(end_time, NULL))
1090 break;
1091 }
1092 i915_request_put(request);
1093 intel_engine_pm_put(engine);
1094
1095 err = igt_live_test_end(&t);
1096 if (err)
1097 goto out_batch;
1098
1099 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
1100 engine->name,
1101 ktime_to_ns(times[0]),
1102 prime, div64_u64(ktime_to_ns(times[1]), prime));
1103 }
1104
1105out_batch:
1106 i915_vma_unpin(batch);
1107 i915_vma_put(batch);
1108 return err;
1109}
1110
1111static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
1112{
1113 struct drm_i915_gem_object *obj;
1114 const int ver = GRAPHICS_VER(i915);
1115 struct i915_vma *vma;
1116 u32 *cmd;
1117 int err;
1118
1119 obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1120 if (IS_ERR(obj))
1121 return ERR_CAST(obj);
1122
1123 vma = i915_vma_instance(obj, to_gt(i915)->vm, NULL);
1124 if (IS_ERR(vma)) {
1125 err = PTR_ERR(vma);
1126 goto err;
1127 }
1128
1129 err = i915_vma_pin(vma, 0, 0, PIN_USER);
1130 if (err)
1131 goto err;
1132
1133 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1134 if (IS_ERR(cmd)) {
1135 err = PTR_ERR(cmd);
1136 goto err;
1137 }
1138
1139 if (ver >= 8) {
1140 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
1141 *cmd++ = lower_32_bits(vma->node.start);
1142 *cmd++ = upper_32_bits(vma->node.start);
1143 } else if (ver >= 6) {
1144 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
1145 *cmd++ = lower_32_bits(vma->node.start);
1146 } else {
1147 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1148 *cmd++ = lower_32_bits(vma->node.start);
1149 }
1150 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
1151
1152 __i915_gem_object_flush_map(obj, 0, 64);
1153 i915_gem_object_unpin_map(obj);
1154
1155 intel_gt_chipset_flush(to_gt(i915));
1156
1157 return vma;
1158
1159err:
1160 i915_gem_object_put(obj);
1161 return ERR_PTR(err);
1162}
1163
1164static int recursive_batch_resolve(struct i915_vma *batch)
1165{
1166 u32 *cmd;
1167
1168 cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1169 if (IS_ERR(cmd))
1170 return PTR_ERR(cmd);
1171
1172 *cmd = MI_BATCH_BUFFER_END;
1173
1174 __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
1175 i915_gem_object_unpin_map(batch->obj);
1176
1177 intel_gt_chipset_flush(batch->vm->gt);
1178
1179 return 0;
1180}
1181
1182static int live_all_engines(void *arg)
1183{
1184 struct drm_i915_private *i915 = arg;
1185 const unsigned int nengines = num_uabi_engines(i915);
1186 struct intel_engine_cs *engine;
1187 struct i915_request **request;
1188 struct igt_live_test t;
1189 struct i915_vma *batch;
1190 unsigned int idx;
1191 int err;
1192
1193 /*
1194 * Check we can submit requests to all engines simultaneously. We
1195 * send a recursive batch to each engine - checking that we don't
1196 * block doing so, and that they don't complete too soon.
1197 */
1198
1199 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1200 if (!request)
1201 return -ENOMEM;
1202
1203 err = igt_live_test_begin(&t, i915, __func__, "");
1204 if (err)
1205 goto out_free;
1206
1207 batch = recursive_batch(i915);
1208 if (IS_ERR(batch)) {
1209 err = PTR_ERR(batch);
1210 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
1211 goto out_free;
1212 }
1213
1214 i915_vma_lock(batch);
1215
1216 idx = 0;
1217 for_each_uabi_engine(engine, i915) {
1218 request[idx] = intel_engine_create_kernel_request(engine);
1219 if (IS_ERR(request[idx])) {
1220 err = PTR_ERR(request[idx]);
1221 pr_err("%s: Request allocation failed with err=%d\n",
1222 __func__, err);
1223 goto out_request;
1224 }
1225
1226 err = i915_vma_move_to_active(batch, request[idx], 0);
1227 GEM_BUG_ON(err);
1228
1229 err = engine->emit_bb_start(request[idx],
1230 batch->node.start,
1231 batch->node.size,
1232 0);
1233 GEM_BUG_ON(err);
1234 request[idx]->batch = batch;
1235
1236 i915_request_get(request[idx]);
1237 i915_request_add(request[idx]);
1238 idx++;
1239 }
1240
1241 i915_vma_unlock(batch);
1242
1243 idx = 0;
1244 for_each_uabi_engine(engine, i915) {
1245 if (i915_request_completed(request[idx])) {
1246 pr_err("%s(%s): request completed too early!\n",
1247 __func__, engine->name);
1248 err = -EINVAL;
1249 goto out_request;
1250 }
1251 idx++;
1252 }
1253
1254 err = recursive_batch_resolve(batch);
1255 if (err) {
1256 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
1257 goto out_request;
1258 }
1259
1260 idx = 0;
1261 for_each_uabi_engine(engine, i915) {
1262 long timeout;
1263
1264 timeout = i915_request_wait(request[idx], 0,
1265 MAX_SCHEDULE_TIMEOUT);
1266 if (timeout < 0) {
1267 err = timeout;
1268 pr_err("%s: error waiting for request on %s, err=%d\n",
1269 __func__, engine->name, err);
1270 goto out_request;
1271 }
1272
1273 GEM_BUG_ON(!i915_request_completed(request[idx]));
1274 i915_request_put(request[idx]);
1275 request[idx] = NULL;
1276 idx++;
1277 }
1278
1279 err = igt_live_test_end(&t);
1280
1281out_request:
1282 idx = 0;
1283 for_each_uabi_engine(engine, i915) {
1284 if (request[idx])
1285 i915_request_put(request[idx]);
1286 idx++;
1287 }
1288 i915_vma_unpin(batch);
1289 i915_vma_put(batch);
1290out_free:
1291 kfree(request);
1292 return err;
1293}
1294
1295static int live_sequential_engines(void *arg)
1296{
1297 struct drm_i915_private *i915 = arg;
1298 const unsigned int nengines = num_uabi_engines(i915);
1299 struct i915_request **request;
1300 struct i915_request *prev = NULL;
1301 struct intel_engine_cs *engine;
1302 struct igt_live_test t;
1303 unsigned int idx;
1304 int err;
1305
1306 /*
1307 * Check we can submit requests to all engines sequentially, such
1308 * that each successive request waits for the earlier ones. This
1309 * tests that we don't execute requests out of order, even though
1310 * they are running on independent engines.
1311 */
1312
1313 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1314 if (!request)
1315 return -ENOMEM;
1316
1317 err = igt_live_test_begin(&t, i915, __func__, "");
1318 if (err)
1319 goto out_free;
1320
1321 idx = 0;
1322 for_each_uabi_engine(engine, i915) {
1323 struct i915_vma *batch;
1324
1325 batch = recursive_batch(i915);
1326 if (IS_ERR(batch)) {
1327 err = PTR_ERR(batch);
1328 pr_err("%s: Unable to create batch for %s, err=%d\n",
1329 __func__, engine->name, err);
1330 goto out_free;
1331 }
1332
1333 i915_vma_lock(batch);
1334 request[idx] = intel_engine_create_kernel_request(engine);
1335 if (IS_ERR(request[idx])) {
1336 err = PTR_ERR(request[idx]);
1337 pr_err("%s: Request allocation failed for %s with err=%d\n",
1338 __func__, engine->name, err);
1339 goto out_unlock;
1340 }
1341
1342 if (prev) {
1343 err = i915_request_await_dma_fence(request[idx],
1344 &prev->fence);
1345 if (err) {
1346 i915_request_add(request[idx]);
1347 pr_err("%s: Request await failed for %s with err=%d\n",
1348 __func__, engine->name, err);
1349 goto out_unlock;
1350 }
1351 }
1352
1353 err = i915_vma_move_to_active(batch, request[idx], 0);
1354 GEM_BUG_ON(err);
1355
1356 err = engine->emit_bb_start(request[idx],
1357 batch->node.start,
1358 batch->node.size,
1359 0);
1360 GEM_BUG_ON(err);
1361 request[idx]->batch = batch;
1362
1363 i915_request_get(request[idx]);
1364 i915_request_add(request[idx]);
1365
1366 prev = request[idx];
1367 idx++;
1368
1369out_unlock:
1370 i915_vma_unlock(batch);
1371 if (err)
1372 goto out_request;
1373 }
1374
1375 idx = 0;
1376 for_each_uabi_engine(engine, i915) {
1377 long timeout;
1378
1379 if (i915_request_completed(request[idx])) {
1380 pr_err("%s(%s): request completed too early!\n",
1381 __func__, engine->name);
1382 err = -EINVAL;
1383 goto out_request;
1384 }
1385
1386 err = recursive_batch_resolve(request[idx]->batch);
1387 if (err) {
1388 pr_err("%s: failed to resolve batch, err=%d\n",
1389 __func__, err);
1390 goto out_request;
1391 }
1392
1393 timeout = i915_request_wait(request[idx], 0,
1394 MAX_SCHEDULE_TIMEOUT);
1395 if (timeout < 0) {
1396 err = timeout;
1397 pr_err("%s: error waiting for request on %s, err=%d\n",
1398 __func__, engine->name, err);
1399 goto out_request;
1400 }
1401
1402 GEM_BUG_ON(!i915_request_completed(request[idx]));
1403 idx++;
1404 }
1405
1406 err = igt_live_test_end(&t);
1407
1408out_request:
1409 idx = 0;
1410 for_each_uabi_engine(engine, i915) {
1411 u32 *cmd;
1412
1413 if (!request[idx])
1414 break;
1415
1416 cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj,
1417 I915_MAP_WC);
1418 if (!IS_ERR(cmd)) {
1419 *cmd = MI_BATCH_BUFFER_END;
1420
1421 __i915_gem_object_flush_map(request[idx]->batch->obj,
1422 0, sizeof(*cmd));
1423 i915_gem_object_unpin_map(request[idx]->batch->obj);
1424
1425 intel_gt_chipset_flush(engine->gt);
1426 }
1427
1428 i915_vma_put(request[idx]->batch);
1429 i915_request_put(request[idx]);
1430 idx++;
1431 }
1432out_free:
1433 kfree(request);
1434 return err;
1435}
1436
1437struct parallel_thread {
1438 struct kthread_worker *worker;
1439 struct kthread_work work;
1440 struct intel_engine_cs *engine;
1441 int result;
1442};
1443
1444static void __live_parallel_engine1(struct kthread_work *work)
1445{
1446 struct parallel_thread *thread =
1447 container_of(work, typeof(*thread), work);
1448 struct intel_engine_cs *engine = thread->engine;
1449 IGT_TIMEOUT(end_time);
1450 unsigned long count;
1451 int err = 0;
1452
1453 count = 0;
1454 intel_engine_pm_get(engine);
1455 do {
1456 struct i915_request *rq;
1457
1458 rq = i915_request_create(engine->kernel_context);
1459 if (IS_ERR(rq)) {
1460 err = PTR_ERR(rq);
1461 break;
1462 }
1463
1464 i915_request_get(rq);
1465 i915_request_add(rq);
1466
1467 err = 0;
1468 if (i915_request_wait(rq, 0, HZ) < 0)
1469 err = -ETIME;
1470 i915_request_put(rq);
1471 if (err)
1472 break;
1473
1474 count++;
1475 } while (!__igt_timeout(end_time, NULL));
1476 intel_engine_pm_put(engine);
1477
1478 pr_info("%s: %lu request + sync\n", engine->name, count);
1479 thread->result = err;
1480}
1481
1482static void __live_parallel_engineN(struct kthread_work *work)
1483{
1484 struct parallel_thread *thread =
1485 container_of(work, typeof(*thread), work);
1486 struct intel_engine_cs *engine = thread->engine;
1487 IGT_TIMEOUT(end_time);
1488 unsigned long count;
1489 int err = 0;
1490
1491 count = 0;
1492 intel_engine_pm_get(engine);
1493 do {
1494 struct i915_request *rq;
1495
1496 rq = i915_request_create(engine->kernel_context);
1497 if (IS_ERR(rq)) {
1498 err = PTR_ERR(rq);
1499 break;
1500 }
1501
1502 i915_request_add(rq);
1503 count++;
1504 } while (!__igt_timeout(end_time, NULL));
1505 intel_engine_pm_put(engine);
1506
1507 pr_info("%s: %lu requests\n", engine->name, count);
1508 thread->result = err;
1509}
1510
1511static bool wake_all(struct drm_i915_private *i915)
1512{
1513 if (atomic_dec_and_test(&i915->selftest.counter)) {
1514 wake_up_var(&i915->selftest.counter);
1515 return true;
1516 }
1517
1518 return false;
1519}
1520
1521static int wait_for_all(struct drm_i915_private *i915)
1522{
1523 if (wake_all(i915))
1524 return 0;
1525
1526 if (wait_var_event_timeout(&i915->selftest.counter,
1527 !atomic_read(&i915->selftest.counter),
1528 i915_selftest.timeout_jiffies))
1529 return 0;
1530
1531 return -ETIME;
1532}
1533
1534static void __live_parallel_spin(struct kthread_work *work)
1535{
1536 struct parallel_thread *thread =
1537 container_of(work, typeof(*thread), work);
1538 struct intel_engine_cs *engine = thread->engine;
1539 struct igt_spinner spin;
1540 struct i915_request *rq;
1541 int err = 0;
1542
1543 /*
1544 * Create a spinner running for eternity on each engine. If a second
1545 * spinner is incorrectly placed on the same engine, it will not be
1546 * able to start in time.
1547 */
1548
1549 if (igt_spinner_init(&spin, engine->gt)) {
1550 wake_all(engine->i915);
1551 thread->result = -ENOMEM;
1552 return;
1553 }
1554
1555 intel_engine_pm_get(engine);
1556 rq = igt_spinner_create_request(&spin,
1557 engine->kernel_context,
1558 MI_NOOP); /* no preemption */
1559 intel_engine_pm_put(engine);
1560 if (IS_ERR(rq)) {
1561 err = PTR_ERR(rq);
1562 if (err == -ENODEV)
1563 err = 0;
1564 wake_all(engine->i915);
1565 goto out_spin;
1566 }
1567
1568 i915_request_get(rq);
1569 i915_request_add(rq);
1570 if (igt_wait_for_spinner(&spin, rq)) {
1571 /* Occupy this engine for the whole test */
1572 err = wait_for_all(engine->i915);
1573 } else {
1574 pr_err("Failed to start spinner on %s\n", engine->name);
1575 err = -EINVAL;
1576 }
1577 igt_spinner_end(&spin);
1578
1579 if (err == 0 && i915_request_wait(rq, 0, HZ) < 0)
1580 err = -EIO;
1581 i915_request_put(rq);
1582
1583out_spin:
1584 igt_spinner_fini(&spin);
1585 thread->result = err;
1586}
1587
1588static int live_parallel_engines(void *arg)
1589{
1590 struct drm_i915_private *i915 = arg;
1591 static void (* const func[])(struct kthread_work *) = {
1592 __live_parallel_engine1,
1593 __live_parallel_engineN,
1594 __live_parallel_spin,
1595 NULL,
1596 };
1597 const unsigned int nengines = num_uabi_engines(i915);
1598 struct parallel_thread *threads;
1599 struct intel_engine_cs *engine;
1600 void (* const *fn)(struct kthread_work *);
1601 int err = 0;
1602
1603 /*
1604 * Check we can submit requests to all engines concurrently. This
1605 * tests that we load up the system maximally.
1606 */
1607
1608 threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL);
1609 if (!threads)
1610 return -ENOMEM;
1611
1612 for (fn = func; !err && *fn; fn++) {
1613 char name[KSYM_NAME_LEN];
1614 struct igt_live_test t;
1615 unsigned int idx;
1616
1617 snprintf(name, sizeof(name), "%ps", *fn);
1618 err = igt_live_test_begin(&t, i915, __func__, name);
1619 if (err)
1620 break;
1621
1622 atomic_set(&i915->selftest.counter, nengines);
1623
1624 idx = 0;
1625 for_each_uabi_engine(engine, i915) {
1626 struct kthread_worker *worker;
1627
1628 worker = kthread_create_worker(0, "igt/parallel:%s",
1629 engine->name);
1630 if (IS_ERR(worker)) {
1631 err = PTR_ERR(worker);
1632 break;
1633 }
1634
1635 threads[idx].worker = worker;
1636 threads[idx].result = 0;
1637 threads[idx].engine = engine;
1638
1639 kthread_init_work(&threads[idx].work, *fn);
1640 kthread_queue_work(worker, &threads[idx].work);
1641 idx++;
1642 }
1643
1644 idx = 0;
1645 for_each_uabi_engine(engine, i915) {
1646 int status;
1647
1648 if (!threads[idx].worker)
1649 break;
1650
1651 kthread_flush_work(&threads[idx].work);
1652 status = READ_ONCE(threads[idx].result);
1653 if (status && !err)
1654 err = status;
1655
1656 kthread_destroy_worker(threads[idx++].worker);
1657 }
1658
1659 if (igt_live_test_end(&t))
1660 err = -EIO;
1661 }
1662
1663 kfree(threads);
1664 return err;
1665}
1666
1667static int
1668max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1669{
1670 struct i915_request *rq;
1671 int ret;
1672
1673 /*
1674 * Before execlists, all contexts share the same ringbuffer. With
1675 * execlists, each context/engine has a separate ringbuffer and
1676 * for the purposes of this test, inexhaustible.
1677 *
1678 * For the global ringbuffer though, we have to be very careful
1679 * that we do not wrap while preventing the execution of requests
1680 * with a unsignaled fence.
1681 */
1682 if (HAS_EXECLISTS(ctx->i915))
1683 return INT_MAX;
1684
1685 rq = igt_request_alloc(ctx, engine);
1686 if (IS_ERR(rq)) {
1687 ret = PTR_ERR(rq);
1688 } else {
1689 int sz;
1690
1691 ret = rq->ring->size - rq->reserved_space;
1692 i915_request_add(rq);
1693
1694 sz = rq->ring->emit - rq->head;
1695 if (sz < 0)
1696 sz += rq->ring->size;
1697 ret /= sz;
1698 ret /= 2; /* leave half spare, in case of emergency! */
1699 }
1700
1701 return ret;
1702}
1703
1704static int live_breadcrumbs_smoketest(void *arg)
1705{
1706 struct drm_i915_private *i915 = arg;
1707 const unsigned int nengines = num_uabi_engines(i915);
1708 const unsigned int ncpus = /* saturate with nengines * ncpus */
1709 max_t(int, 2, DIV_ROUND_UP(num_online_cpus(), nengines));
1710 unsigned long num_waits, num_fences;
1711 struct intel_engine_cs *engine;
1712 struct smoke_thread *threads;
1713 struct igt_live_test live;
1714 intel_wakeref_t wakeref;
1715 struct smoketest *smoke;
1716 unsigned int n, idx;
1717 struct file *file;
1718 int ret = 0;
1719
1720 /*
1721 * Smoketest our breadcrumb/signal handling for requests across multiple
1722 * threads. A very simple test to only catch the most egregious of bugs.
1723 * See __igt_breadcrumbs_smoketest();
1724 *
1725 * On real hardware this time.
1726 */
1727
1728 wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1729
1730 file = mock_file(i915);
1731 if (IS_ERR(file)) {
1732 ret = PTR_ERR(file);
1733 goto out_rpm;
1734 }
1735
1736 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1737 if (!smoke) {
1738 ret = -ENOMEM;
1739 goto out_file;
1740 }
1741
1742 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1743 if (!threads) {
1744 ret = -ENOMEM;
1745 goto out_smoke;
1746 }
1747
1748 smoke[0].request_alloc = __live_request_alloc;
1749 smoke[0].ncontexts = 64;
1750 smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1751 sizeof(*smoke[0].contexts),
1752 GFP_KERNEL);
1753 if (!smoke[0].contexts) {
1754 ret = -ENOMEM;
1755 goto out_threads;
1756 }
1757
1758 for (n = 0; n < smoke[0].ncontexts; n++) {
1759 smoke[0].contexts[n] = live_context(i915, file);
1760 if (IS_ERR(smoke[0].contexts[n])) {
1761 ret = PTR_ERR(smoke[0].contexts[n]);
1762 goto out_contexts;
1763 }
1764 }
1765
1766 ret = igt_live_test_begin(&live, i915, __func__, "");
1767 if (ret)
1768 goto out_contexts;
1769
1770 idx = 0;
1771 for_each_uabi_engine(engine, i915) {
1772 smoke[idx] = smoke[0];
1773 smoke[idx].engine = engine;
1774 smoke[idx].max_batch =
1775 max_batches(smoke[0].contexts[0], engine);
1776 if (smoke[idx].max_batch < 0) {
1777 ret = smoke[idx].max_batch;
1778 goto out_flush;
1779 }
1780 /* One ring interleaved between requests from all cpus */
1781 smoke[idx].max_batch /= ncpus + 1;
1782 pr_debug("Limiting batches to %d requests on %s\n",
1783 smoke[idx].max_batch, engine->name);
1784
1785 for (n = 0; n < ncpus; n++) {
1786 unsigned int i = idx * ncpus + n;
1787 struct kthread_worker *worker;
1788
1789 worker = kthread_create_worker(0, "igt/%d.%d", idx, n);
1790 if (IS_ERR(worker)) {
1791 ret = PTR_ERR(worker);
1792 goto out_flush;
1793 }
1794
1795 threads[i].worker = worker;
1796 threads[i].t = &smoke[idx];
1797
1798 kthread_init_work(&threads[i].work,
1799 __igt_breadcrumbs_smoketest);
1800 kthread_queue_work(worker, &threads[i].work);
1801 }
1802
1803 idx++;
1804 }
1805
1806 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1807
1808out_flush:
1809 idx = 0;
1810 num_waits = 0;
1811 num_fences = 0;
1812 for_each_uabi_engine(engine, i915) {
1813 for (n = 0; n < ncpus; n++) {
1814 unsigned int i = idx * ncpus + n;
1815 int err;
1816
1817 if (!threads[i].worker)
1818 continue;
1819
1820 WRITE_ONCE(threads[i].stop, true);
1821 kthread_flush_work(&threads[i].work);
1822 err = READ_ONCE(threads[i].result);
1823 if (err < 0 && !ret)
1824 ret = err;
1825
1826 kthread_destroy_worker(threads[i].worker);
1827 }
1828
1829 num_waits += atomic_long_read(&smoke[idx].num_waits);
1830 num_fences += atomic_long_read(&smoke[idx].num_fences);
1831 idx++;
1832 }
1833 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1834 num_waits, num_fences, idx, ncpus);
1835
1836 ret = igt_live_test_end(&live) ?: ret;
1837out_contexts:
1838 kfree(smoke[0].contexts);
1839out_threads:
1840 kfree(threads);
1841out_smoke:
1842 kfree(smoke);
1843out_file:
1844 fput(file);
1845out_rpm:
1846 intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1847
1848 return ret;
1849}
1850
1851int i915_request_live_selftests(struct drm_i915_private *i915)
1852{
1853 static const struct i915_subtest tests[] = {
1854 SUBTEST(live_nop_request),
1855 SUBTEST(live_all_engines),
1856 SUBTEST(live_sequential_engines),
1857 SUBTEST(live_parallel_engines),
1858 SUBTEST(live_empty_request),
1859 SUBTEST(live_cancel_request),
1860 SUBTEST(live_breadcrumbs_smoketest),
1861 };
1862
1863 if (intel_gt_is_wedged(to_gt(i915)))
1864 return 0;
1865
1866 return i915_live_subtests(tests, i915);
1867}
1868
1869static int switch_to_kernel_sync(struct intel_context *ce, int err)
1870{
1871 struct i915_request *rq;
1872 struct dma_fence *fence;
1873
1874 rq = intel_engine_create_kernel_request(ce->engine);
1875 if (IS_ERR(rq))
1876 return PTR_ERR(rq);
1877
1878 fence = i915_active_fence_get(&ce->timeline->last_request);
1879 if (fence) {
1880 i915_request_await_dma_fence(rq, fence);
1881 dma_fence_put(fence);
1882 }
1883
1884 rq = i915_request_get(rq);
1885 i915_request_add(rq);
1886 if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
1887 err = -ETIME;
1888 i915_request_put(rq);
1889
1890 while (!err && !intel_engine_is_idle(ce->engine))
1891 intel_engine_flush_submission(ce->engine);
1892
1893 return err;
1894}
1895
1896struct perf_stats {
1897 struct intel_engine_cs *engine;
1898 unsigned long count;
1899 ktime_t time;
1900 ktime_t busy;
1901 u64 runtime;
1902};
1903
1904struct perf_series {
1905 struct drm_i915_private *i915;
1906 unsigned int nengines;
1907 struct intel_context *ce[];
1908};
1909
1910static int cmp_u32(const void *A, const void *B)
1911{
1912 const u32 *a = A, *b = B;
1913
1914 return *a - *b;
1915}
1916
1917static u32 trifilter(u32 *a)
1918{
1919 u64 sum;
1920
1921#define TF_COUNT 5
1922 sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL);
1923
1924 sum = mul_u32_u32(a[2], 2);
1925 sum += a[1];
1926 sum += a[3];
1927
1928 GEM_BUG_ON(sum > U32_MAX);
1929 return sum;
1930#define TF_BIAS 2
1931}
1932
1933static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles)
1934{
1935 u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles);
1936
1937 return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS);
1938}
1939
1940static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset)
1941{
1942 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1943 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base)));
1944 *cs++ = offset;
1945 *cs++ = 0;
1946
1947 return cs;
1948}
1949
1950static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value)
1951{
1952 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1953 *cs++ = offset;
1954 *cs++ = 0;
1955 *cs++ = value;
1956
1957 return cs;
1958}
1959
1960static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset)
1961{
1962 *cs++ = MI_SEMAPHORE_WAIT |
1963 MI_SEMAPHORE_GLOBAL_GTT |
1964 MI_SEMAPHORE_POLL |
1965 mode;
1966 *cs++ = value;
1967 *cs++ = offset;
1968 *cs++ = 0;
1969
1970 return cs;
1971}
1972
1973static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value)
1974{
1975 return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset);
1976}
1977
1978static void semaphore_set(u32 *sema, u32 value)
1979{
1980 WRITE_ONCE(*sema, value);
1981 wmb(); /* flush the update to the cache, and beyond */
1982}
1983
1984static u32 *hwsp_scratch(const struct intel_context *ce)
1985{
1986 return memset32(ce->engine->status_page.addr + 1000, 0, 21);
1987}
1988
1989static u32 hwsp_offset(const struct intel_context *ce, u32 *dw)
1990{
1991 return (i915_ggtt_offset(ce->engine->status_page.vma) +
1992 offset_in_page(dw));
1993}
1994
1995static int measure_semaphore_response(struct intel_context *ce)
1996{
1997 u32 *sema = hwsp_scratch(ce);
1998 const u32 offset = hwsp_offset(ce, sema);
1999 u32 elapsed[TF_COUNT], cycles;
2000 struct i915_request *rq;
2001 u32 *cs;
2002 int err;
2003 int i;
2004
2005 /*
2006 * Measure how many cycles it takes for the HW to detect the change
2007 * in a semaphore value.
2008 *
2009 * A: read CS_TIMESTAMP from CPU
2010 * poke semaphore
2011 * B: read CS_TIMESTAMP on GPU
2012 *
2013 * Semaphore latency: B - A
2014 */
2015
2016 semaphore_set(sema, -1);
2017
2018 rq = i915_request_create(ce);
2019 if (IS_ERR(rq))
2020 return PTR_ERR(rq);
2021
2022 cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed));
2023 if (IS_ERR(cs)) {
2024 i915_request_add(rq);
2025 err = PTR_ERR(cs);
2026 goto err;
2027 }
2028
2029 cs = emit_store_dw(cs, offset, 0);
2030 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2031 cs = emit_semaphore_poll_until(cs, offset, i);
2032 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2033 cs = emit_store_dw(cs, offset, 0);
2034 }
2035
2036 intel_ring_advance(rq, cs);
2037 i915_request_add(rq);
2038
2039 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2040 err = -EIO;
2041 goto err;
2042 }
2043
2044 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2045 preempt_disable();
2046 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2047 semaphore_set(sema, i);
2048 preempt_enable();
2049
2050 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2051 err = -EIO;
2052 goto err;
2053 }
2054
2055 elapsed[i - 1] = sema[i] - cycles;
2056 }
2057
2058 cycles = trifilter(elapsed);
2059 pr_info("%s: semaphore response %d cycles, %lluns\n",
2060 ce->engine->name, cycles >> TF_BIAS,
2061 cycles_to_ns(ce->engine, cycles));
2062
2063 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2064
2065err:
2066 intel_gt_set_wedged(ce->engine->gt);
2067 return err;
2068}
2069
2070static int measure_idle_dispatch(struct intel_context *ce)
2071{
2072 u32 *sema = hwsp_scratch(ce);
2073 const u32 offset = hwsp_offset(ce, sema);
2074 u32 elapsed[TF_COUNT], cycles;
2075 u32 *cs;
2076 int err;
2077 int i;
2078
2079 /*
2080 * Measure how long it takes for us to submit a request while the
2081 * engine is idle, but is resting in our context.
2082 *
2083 * A: read CS_TIMESTAMP from CPU
2084 * submit request
2085 * B: read CS_TIMESTAMP on GPU
2086 *
2087 * Submission latency: B - A
2088 */
2089
2090 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2091 struct i915_request *rq;
2092
2093 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2094 if (err)
2095 return err;
2096
2097 rq = i915_request_create(ce);
2098 if (IS_ERR(rq)) {
2099 err = PTR_ERR(rq);
2100 goto err;
2101 }
2102
2103 cs = intel_ring_begin(rq, 4);
2104 if (IS_ERR(cs)) {
2105 i915_request_add(rq);
2106 err = PTR_ERR(cs);
2107 goto err;
2108 }
2109
2110 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2111
2112 intel_ring_advance(rq, cs);
2113
2114 preempt_disable();
2115 local_bh_disable();
2116 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2117 i915_request_add(rq);
2118 local_bh_enable();
2119 preempt_enable();
2120 }
2121
2122 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2123 if (err)
2124 goto err;
2125
2126 for (i = 0; i < ARRAY_SIZE(elapsed); i++)
2127 elapsed[i] = sema[i] - elapsed[i];
2128
2129 cycles = trifilter(elapsed);
2130 pr_info("%s: idle dispatch latency %d cycles, %lluns\n",
2131 ce->engine->name, cycles >> TF_BIAS,
2132 cycles_to_ns(ce->engine, cycles));
2133
2134 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2135
2136err:
2137 intel_gt_set_wedged(ce->engine->gt);
2138 return err;
2139}
2140
2141static int measure_busy_dispatch(struct intel_context *ce)
2142{
2143 u32 *sema = hwsp_scratch(ce);
2144 const u32 offset = hwsp_offset(ce, sema);
2145 u32 elapsed[TF_COUNT + 1], cycles;
2146 u32 *cs;
2147 int err;
2148 int i;
2149
2150 /*
2151 * Measure how long it takes for us to submit a request while the
2152 * engine is busy, polling on a semaphore in our context. With
2153 * direct submission, this will include the cost of a lite restore.
2154 *
2155 * A: read CS_TIMESTAMP from CPU
2156 * submit request
2157 * B: read CS_TIMESTAMP on GPU
2158 *
2159 * Submission latency: B - A
2160 */
2161
2162 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2163 struct i915_request *rq;
2164
2165 rq = i915_request_create(ce);
2166 if (IS_ERR(rq)) {
2167 err = PTR_ERR(rq);
2168 goto err;
2169 }
2170
2171 cs = intel_ring_begin(rq, 12);
2172 if (IS_ERR(cs)) {
2173 i915_request_add(rq);
2174 err = PTR_ERR(cs);
2175 goto err;
2176 }
2177
2178 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2179 cs = emit_semaphore_poll_until(cs, offset, i);
2180 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2181
2182 intel_ring_advance(rq, cs);
2183
2184 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) {
2185 err = -EIO;
2186 goto err;
2187 }
2188
2189 preempt_disable();
2190 local_bh_disable();
2191 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2192 i915_request_add(rq);
2193 local_bh_enable();
2194 semaphore_set(sema, i - 1);
2195 preempt_enable();
2196 }
2197
2198 wait_for(READ_ONCE(sema[i - 1]), 500);
2199 semaphore_set(sema, i - 1);
2200
2201 for (i = 1; i <= TF_COUNT; i++) {
2202 GEM_BUG_ON(sema[i] == -1);
2203 elapsed[i - 1] = sema[i] - elapsed[i];
2204 }
2205
2206 cycles = trifilter(elapsed);
2207 pr_info("%s: busy dispatch latency %d cycles, %lluns\n",
2208 ce->engine->name, cycles >> TF_BIAS,
2209 cycles_to_ns(ce->engine, cycles));
2210
2211 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2212
2213err:
2214 intel_gt_set_wedged(ce->engine->gt);
2215 return err;
2216}
2217
2218static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value)
2219{
2220 const u32 offset =
2221 i915_ggtt_offset(engine->status_page.vma) +
2222 offset_in_page(sema);
2223 struct i915_request *rq;
2224 u32 *cs;
2225
2226 rq = i915_request_create(engine->kernel_context);
2227 if (IS_ERR(rq))
2228 return PTR_ERR(rq);
2229
2230 cs = intel_ring_begin(rq, 4);
2231 if (IS_ERR(cs)) {
2232 i915_request_add(rq);
2233 return PTR_ERR(cs);
2234 }
2235
2236 cs = emit_semaphore_poll(cs, mode, value, offset);
2237
2238 intel_ring_advance(rq, cs);
2239 i915_request_add(rq);
2240
2241 return 0;
2242}
2243
2244static int measure_inter_request(struct intel_context *ce)
2245{
2246 u32 *sema = hwsp_scratch(ce);
2247 const u32 offset = hwsp_offset(ce, sema);
2248 u32 elapsed[TF_COUNT + 1], cycles;
2249 struct i915_sw_fence *submit;
2250 int i, err;
2251
2252 /*
2253 * Measure how long it takes to advance from one request into the
2254 * next. Between each request we flush the GPU caches to memory,
2255 * update the breadcrumbs, and then invalidate those caches.
2256 * We queue up all the requests to be submitted in one batch so
2257 * it should be one set of contiguous measurements.
2258 *
2259 * A: read CS_TIMESTAMP on GPU
2260 * advance request
2261 * B: read CS_TIMESTAMP on GPU
2262 *
2263 * Request latency: B - A
2264 */
2265
2266 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2267 if (err)
2268 return err;
2269
2270 submit = heap_fence_create(GFP_KERNEL);
2271 if (!submit) {
2272 semaphore_set(sema, 1);
2273 return -ENOMEM;
2274 }
2275
2276 intel_engine_flush_submission(ce->engine);
2277 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2278 struct i915_request *rq;
2279 u32 *cs;
2280
2281 rq = i915_request_create(ce);
2282 if (IS_ERR(rq)) {
2283 err = PTR_ERR(rq);
2284 goto err_submit;
2285 }
2286
2287 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
2288 submit,
2289 GFP_KERNEL);
2290 if (err < 0) {
2291 i915_request_add(rq);
2292 goto err_submit;
2293 }
2294
2295 cs = intel_ring_begin(rq, 4);
2296 if (IS_ERR(cs)) {
2297 i915_request_add(rq);
2298 err = PTR_ERR(cs);
2299 goto err_submit;
2300 }
2301
2302 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2303
2304 intel_ring_advance(rq, cs);
2305 i915_request_add(rq);
2306 }
2307 i915_sw_fence_commit(submit);
2308 intel_engine_flush_submission(ce->engine);
2309 heap_fence_put(submit);
2310
2311 semaphore_set(sema, 1);
2312 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2313 if (err)
2314 goto err;
2315
2316 for (i = 1; i <= TF_COUNT; i++)
2317 elapsed[i - 1] = sema[i + 1] - sema[i];
2318
2319 cycles = trifilter(elapsed);
2320 pr_info("%s: inter-request latency %d cycles, %lluns\n",
2321 ce->engine->name, cycles >> TF_BIAS,
2322 cycles_to_ns(ce->engine, cycles));
2323
2324 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2325
2326err_submit:
2327 i915_sw_fence_commit(submit);
2328 heap_fence_put(submit);
2329 semaphore_set(sema, 1);
2330err:
2331 intel_gt_set_wedged(ce->engine->gt);
2332 return err;
2333}
2334
2335static int measure_context_switch(struct intel_context *ce)
2336{
2337 u32 *sema = hwsp_scratch(ce);
2338 const u32 offset = hwsp_offset(ce, sema);
2339 struct i915_request *fence = NULL;
2340 u32 elapsed[TF_COUNT + 1], cycles;
2341 int i, j, err;
2342 u32 *cs;
2343
2344 /*
2345 * Measure how long it takes to advance from one request in one
2346 * context to a request in another context. This allows us to
2347 * measure how long the context save/restore take, along with all
2348 * the inter-context setup we require.
2349 *
2350 * A: read CS_TIMESTAMP on GPU
2351 * switch context
2352 * B: read CS_TIMESTAMP on GPU
2353 *
2354 * Context switch latency: B - A
2355 */
2356
2357 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0);
2358 if (err)
2359 return err;
2360
2361 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2362 struct intel_context *arr[] = {
2363 ce, ce->engine->kernel_context
2364 };
2365 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32);
2366
2367 for (j = 0; j < ARRAY_SIZE(arr); j++) {
2368 struct i915_request *rq;
2369
2370 rq = i915_request_create(arr[j]);
2371 if (IS_ERR(rq)) {
2372 err = PTR_ERR(rq);
2373 goto err_fence;
2374 }
2375
2376 if (fence) {
2377 err = i915_request_await_dma_fence(rq,
2378 &fence->fence);
2379 if (err) {
2380 i915_request_add(rq);
2381 goto err_fence;
2382 }
2383 }
2384
2385 cs = intel_ring_begin(rq, 4);
2386 if (IS_ERR(cs)) {
2387 i915_request_add(rq);
2388 err = PTR_ERR(cs);
2389 goto err_fence;
2390 }
2391
2392 cs = emit_timestamp_store(cs, ce, addr);
2393 addr += sizeof(u32);
2394
2395 intel_ring_advance(rq, cs);
2396
2397 i915_request_put(fence);
2398 fence = i915_request_get(rq);
2399
2400 i915_request_add(rq);
2401 }
2402 }
2403 i915_request_put(fence);
2404 intel_engine_flush_submission(ce->engine);
2405
2406 semaphore_set(sema, 1);
2407 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2408 if (err)
2409 goto err;
2410
2411 for (i = 1; i <= TF_COUNT; i++)
2412 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1];
2413
2414 cycles = trifilter(elapsed);
2415 pr_info("%s: context switch latency %d cycles, %lluns\n",
2416 ce->engine->name, cycles >> TF_BIAS,
2417 cycles_to_ns(ce->engine, cycles));
2418
2419 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2420
2421err_fence:
2422 i915_request_put(fence);
2423 semaphore_set(sema, 1);
2424err:
2425 intel_gt_set_wedged(ce->engine->gt);
2426 return err;
2427}
2428
2429static int measure_preemption(struct intel_context *ce)
2430{
2431 u32 *sema = hwsp_scratch(ce);
2432 const u32 offset = hwsp_offset(ce, sema);
2433 u32 elapsed[TF_COUNT], cycles;
2434 u32 *cs;
2435 int err;
2436 int i;
2437
2438 /*
2439 * We measure two latencies while triggering preemption. The first
2440 * latency is how long it takes for us to submit a preempting request.
2441 * The second latency is how it takes for us to return from the
2442 * preemption back to the original context.
2443 *
2444 * A: read CS_TIMESTAMP from CPU
2445 * submit preemption
2446 * B: read CS_TIMESTAMP on GPU (in preempting context)
2447 * context switch
2448 * C: read CS_TIMESTAMP on GPU (in original context)
2449 *
2450 * Preemption dispatch latency: B - A
2451 * Preemption switch latency: C - B
2452 */
2453
2454 if (!intel_engine_has_preemption(ce->engine))
2455 return 0;
2456
2457 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2458 u32 addr = offset + 2 * i * sizeof(u32);
2459 struct i915_request *rq;
2460
2461 rq = i915_request_create(ce);
2462 if (IS_ERR(rq)) {
2463 err = PTR_ERR(rq);
2464 goto err;
2465 }
2466
2467 cs = intel_ring_begin(rq, 12);
2468 if (IS_ERR(cs)) {
2469 i915_request_add(rq);
2470 err = PTR_ERR(cs);
2471 goto err;
2472 }
2473
2474 cs = emit_store_dw(cs, addr, -1);
2475 cs = emit_semaphore_poll_until(cs, offset, i);
2476 cs = emit_timestamp_store(cs, ce, addr + sizeof(u32));
2477
2478 intel_ring_advance(rq, cs);
2479 i915_request_add(rq);
2480
2481 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) {
2482 err = -EIO;
2483 goto err;
2484 }
2485
2486 rq = i915_request_create(ce->engine->kernel_context);
2487 if (IS_ERR(rq)) {
2488 err = PTR_ERR(rq);
2489 goto err;
2490 }
2491
2492 cs = intel_ring_begin(rq, 8);
2493 if (IS_ERR(cs)) {
2494 i915_request_add(rq);
2495 err = PTR_ERR(cs);
2496 goto err;
2497 }
2498
2499 cs = emit_timestamp_store(cs, ce, addr);
2500 cs = emit_store_dw(cs, offset, i);
2501
2502 intel_ring_advance(rq, cs);
2503 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2504
2505 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2506 i915_request_add(rq);
2507 }
2508
2509 if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) {
2510 err = -EIO;
2511 goto err;
2512 }
2513
2514 for (i = 1; i <= TF_COUNT; i++)
2515 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1];
2516
2517 cycles = trifilter(elapsed);
2518 pr_info("%s: preemption dispatch latency %d cycles, %lluns\n",
2519 ce->engine->name, cycles >> TF_BIAS,
2520 cycles_to_ns(ce->engine, cycles));
2521
2522 for (i = 1; i <= TF_COUNT; i++)
2523 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0];
2524
2525 cycles = trifilter(elapsed);
2526 pr_info("%s: preemption switch latency %d cycles, %lluns\n",
2527 ce->engine->name, cycles >> TF_BIAS,
2528 cycles_to_ns(ce->engine, cycles));
2529
2530 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2531
2532err:
2533 intel_gt_set_wedged(ce->engine->gt);
2534 return err;
2535}
2536
2537struct signal_cb {
2538 struct dma_fence_cb base;
2539 bool seen;
2540};
2541
2542static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
2543{
2544 struct signal_cb *s = container_of(cb, typeof(*s), base);
2545
2546 smp_store_mb(s->seen, true); /* be safe, be strong */
2547}
2548
2549static int measure_completion(struct intel_context *ce)
2550{
2551 u32 *sema = hwsp_scratch(ce);
2552 const u32 offset = hwsp_offset(ce, sema);
2553 u32 elapsed[TF_COUNT], cycles;
2554 u32 *cs;
2555 int err;
2556 int i;
2557
2558 /*
2559 * Measure how long it takes for the signal (interrupt) to be
2560 * sent from the GPU to be processed by the CPU.
2561 *
2562 * A: read CS_TIMESTAMP on GPU
2563 * signal
2564 * B: read CS_TIMESTAMP from CPU
2565 *
2566 * Completion latency: B - A
2567 */
2568
2569 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2570 struct signal_cb cb = { .seen = false };
2571 struct i915_request *rq;
2572
2573 rq = i915_request_create(ce);
2574 if (IS_ERR(rq)) {
2575 err = PTR_ERR(rq);
2576 goto err;
2577 }
2578
2579 cs = intel_ring_begin(rq, 12);
2580 if (IS_ERR(cs)) {
2581 i915_request_add(rq);
2582 err = PTR_ERR(cs);
2583 goto err;
2584 }
2585
2586 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1);
2587 cs = emit_semaphore_poll_until(cs, offset, i);
2588 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32));
2589
2590 intel_ring_advance(rq, cs);
2591
2592 dma_fence_add_callback(&rq->fence, &cb.base, signal_cb);
2593 i915_request_add(rq);
2594
2595 intel_engine_flush_submission(ce->engine);
2596 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
2597 err = -EIO;
2598 goto err;
2599 }
2600
2601 preempt_disable();
2602 semaphore_set(sema, i);
2603 while (!READ_ONCE(cb.seen))
2604 cpu_relax();
2605
2606 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2607 preempt_enable();
2608 }
2609
2610 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2);
2611 if (err)
2612 goto err;
2613
2614 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2615 GEM_BUG_ON(sema[i + 1] == -1);
2616 elapsed[i] = elapsed[i] - sema[i + 1];
2617 }
2618
2619 cycles = trifilter(elapsed);
2620 pr_info("%s: completion latency %d cycles, %lluns\n",
2621 ce->engine->name, cycles >> TF_BIAS,
2622 cycles_to_ns(ce->engine, cycles));
2623
2624 return intel_gt_wait_for_idle(ce->engine->gt, HZ);
2625
2626err:
2627 intel_gt_set_wedged(ce->engine->gt);
2628 return err;
2629}
2630
2631static void rps_pin(struct intel_gt *gt)
2632{
2633 /* Pin the frequency to max */
2634 atomic_inc(>->rps.num_waiters);
2635 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
2636
2637 mutex_lock(>->rps.lock);
2638 intel_rps_set(>->rps, gt->rps.max_freq);
2639 mutex_unlock(>->rps.lock);
2640}
2641
2642static void rps_unpin(struct intel_gt *gt)
2643{
2644 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
2645 atomic_dec(>->rps.num_waiters);
2646}
2647
2648static int perf_request_latency(void *arg)
2649{
2650 struct drm_i915_private *i915 = arg;
2651 struct intel_engine_cs *engine;
2652 struct pm_qos_request qos;
2653 int err = 0;
2654
2655 if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */
2656 return 0;
2657
2658 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2659
2660 for_each_uabi_engine(engine, i915) {
2661 struct intel_context *ce;
2662
2663 ce = intel_context_create(engine);
2664 if (IS_ERR(ce)) {
2665 err = PTR_ERR(ce);
2666 goto out;
2667 }
2668
2669 err = intel_context_pin(ce);
2670 if (err) {
2671 intel_context_put(ce);
2672 goto out;
2673 }
2674
2675 st_engine_heartbeat_disable(engine);
2676 rps_pin(engine->gt);
2677
2678 if (err == 0)
2679 err = measure_semaphore_response(ce);
2680 if (err == 0)
2681 err = measure_idle_dispatch(ce);
2682 if (err == 0)
2683 err = measure_busy_dispatch(ce);
2684 if (err == 0)
2685 err = measure_inter_request(ce);
2686 if (err == 0)
2687 err = measure_context_switch(ce);
2688 if (err == 0)
2689 err = measure_preemption(ce);
2690 if (err == 0)
2691 err = measure_completion(ce);
2692
2693 rps_unpin(engine->gt);
2694 st_engine_heartbeat_enable(engine);
2695
2696 intel_context_unpin(ce);
2697 intel_context_put(ce);
2698 if (err)
2699 goto out;
2700 }
2701
2702out:
2703 if (igt_flush_test(i915))
2704 err = -EIO;
2705
2706 cpu_latency_qos_remove_request(&qos);
2707 return err;
2708}
2709
2710static int s_sync0(void *arg)
2711{
2712 struct perf_series *ps = arg;
2713 IGT_TIMEOUT(end_time);
2714 unsigned int idx = 0;
2715 int err = 0;
2716
2717 GEM_BUG_ON(!ps->nengines);
2718 do {
2719 struct i915_request *rq;
2720
2721 rq = i915_request_create(ps->ce[idx]);
2722 if (IS_ERR(rq)) {
2723 err = PTR_ERR(rq);
2724 break;
2725 }
2726
2727 i915_request_get(rq);
2728 i915_request_add(rq);
2729
2730 if (i915_request_wait(rq, 0, HZ / 5) < 0)
2731 err = -ETIME;
2732 i915_request_put(rq);
2733 if (err)
2734 break;
2735
2736 if (++idx == ps->nengines)
2737 idx = 0;
2738 } while (!__igt_timeout(end_time, NULL));
2739
2740 return err;
2741}
2742
2743static int s_sync1(void *arg)
2744{
2745 struct perf_series *ps = arg;
2746 struct i915_request *prev = NULL;
2747 IGT_TIMEOUT(end_time);
2748 unsigned int idx = 0;
2749 int err = 0;
2750
2751 GEM_BUG_ON(!ps->nengines);
2752 do {
2753 struct i915_request *rq;
2754
2755 rq = i915_request_create(ps->ce[idx]);
2756 if (IS_ERR(rq)) {
2757 err = PTR_ERR(rq);
2758 break;
2759 }
2760
2761 i915_request_get(rq);
2762 i915_request_add(rq);
2763
2764 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
2765 err = -ETIME;
2766 i915_request_put(prev);
2767 prev = rq;
2768 if (err)
2769 break;
2770
2771 if (++idx == ps->nengines)
2772 idx = 0;
2773 } while (!__igt_timeout(end_time, NULL));
2774 i915_request_put(prev);
2775
2776 return err;
2777}
2778
2779static int s_many(void *arg)
2780{
2781 struct perf_series *ps = arg;
2782 IGT_TIMEOUT(end_time);
2783 unsigned int idx = 0;
2784
2785 GEM_BUG_ON(!ps->nengines);
2786 do {
2787 struct i915_request *rq;
2788
2789 rq = i915_request_create(ps->ce[idx]);
2790 if (IS_ERR(rq))
2791 return PTR_ERR(rq);
2792
2793 i915_request_add(rq);
2794
2795 if (++idx == ps->nengines)
2796 idx = 0;
2797 } while (!__igt_timeout(end_time, NULL));
2798
2799 return 0;
2800}
2801
2802static int perf_series_engines(void *arg)
2803{
2804 struct drm_i915_private *i915 = arg;
2805 static int (* const func[])(void *arg) = {
2806 s_sync0,
2807 s_sync1,
2808 s_many,
2809 NULL,
2810 };
2811 const unsigned int nengines = num_uabi_engines(i915);
2812 struct intel_engine_cs *engine;
2813 int (* const *fn)(void *arg);
2814 struct pm_qos_request qos;
2815 struct perf_stats *stats;
2816 struct perf_series *ps;
2817 unsigned int idx;
2818 int err = 0;
2819
2820 stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
2821 if (!stats)
2822 return -ENOMEM;
2823
2824 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
2825 if (!ps) {
2826 kfree(stats);
2827 return -ENOMEM;
2828 }
2829
2830 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
2831
2832 ps->i915 = i915;
2833 ps->nengines = nengines;
2834
2835 idx = 0;
2836 for_each_uabi_engine(engine, i915) {
2837 struct intel_context *ce;
2838
2839 ce = intel_context_create(engine);
2840 if (IS_ERR(ce)) {
2841 err = PTR_ERR(ce);
2842 goto out;
2843 }
2844
2845 err = intel_context_pin(ce);
2846 if (err) {
2847 intel_context_put(ce);
2848 goto out;
2849 }
2850
2851 ps->ce[idx++] = ce;
2852 }
2853 GEM_BUG_ON(idx != ps->nengines);
2854
2855 for (fn = func; *fn && !err; fn++) {
2856 char name[KSYM_NAME_LEN];
2857 struct igt_live_test t;
2858
2859 snprintf(name, sizeof(name), "%ps", *fn);
2860 err = igt_live_test_begin(&t, i915, __func__, name);
2861 if (err)
2862 break;
2863
2864 for (idx = 0; idx < nengines; idx++) {
2865 struct perf_stats *p =
2866 memset(&stats[idx], 0, sizeof(stats[idx]));
2867 struct intel_context *ce = ps->ce[idx];
2868
2869 p->engine = ps->ce[idx]->engine;
2870 intel_engine_pm_get(p->engine);
2871
2872 if (intel_engine_supports_stats(p->engine))
2873 p->busy = intel_engine_get_busy_time(p->engine,
2874 &p->time) + 1;
2875 else
2876 p->time = ktime_get();
2877 p->runtime = -intel_context_get_total_runtime_ns(ce);
2878 }
2879
2880 err = (*fn)(ps);
2881 if (igt_live_test_end(&t))
2882 err = -EIO;
2883
2884 for (idx = 0; idx < nengines; idx++) {
2885 struct perf_stats *p = &stats[idx];
2886 struct intel_context *ce = ps->ce[idx];
2887 int integer, decimal;
2888 u64 busy, dt, now;
2889
2890 if (p->busy)
2891 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine,
2892 &now),
2893 p->busy - 1);
2894 else
2895 now = ktime_get();
2896 p->time = ktime_sub(now, p->time);
2897
2898 err = switch_to_kernel_sync(ce, err);
2899 p->runtime += intel_context_get_total_runtime_ns(ce);
2900 intel_engine_pm_put(p->engine);
2901
2902 busy = 100 * ktime_to_ns(p->busy);
2903 dt = ktime_to_ns(p->time);
2904 if (dt) {
2905 integer = div64_u64(busy, dt);
2906 busy -= integer * dt;
2907 decimal = div64_u64(100 * busy, dt);
2908 } else {
2909 integer = 0;
2910 decimal = 0;
2911 }
2912
2913 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2914 name, p->engine->name, ce->timeline->seqno,
2915 integer, decimal,
2916 div_u64(p->runtime, 1000 * 1000),
2917 div_u64(ktime_to_ns(p->time), 1000 * 1000));
2918 }
2919 }
2920
2921out:
2922 for (idx = 0; idx < nengines; idx++) {
2923 if (IS_ERR_OR_NULL(ps->ce[idx]))
2924 break;
2925
2926 intel_context_unpin(ps->ce[idx]);
2927 intel_context_put(ps->ce[idx]);
2928 }
2929 kfree(ps);
2930
2931 cpu_latency_qos_remove_request(&qos);
2932 kfree(stats);
2933 return err;
2934}
2935
2936struct p_thread {
2937 struct perf_stats p;
2938 struct kthread_worker *worker;
2939 struct kthread_work work;
2940 struct intel_engine_cs *engine;
2941 int result;
2942};
2943
2944static void p_sync0(struct kthread_work *work)
2945{
2946 struct p_thread *thread = container_of(work, typeof(*thread), work);
2947 struct perf_stats *p = &thread->p;
2948 struct intel_engine_cs *engine = p->engine;
2949 struct intel_context *ce;
2950 IGT_TIMEOUT(end_time);
2951 unsigned long count;
2952 bool busy;
2953 int err = 0;
2954
2955 ce = intel_context_create(engine);
2956 if (IS_ERR(ce)) {
2957 thread->result = PTR_ERR(ce);
2958 return;
2959 }
2960
2961 err = intel_context_pin(ce);
2962 if (err) {
2963 intel_context_put(ce);
2964 thread->result = err;
2965 return;
2966 }
2967
2968 if (intel_engine_supports_stats(engine)) {
2969 p->busy = intel_engine_get_busy_time(engine, &p->time);
2970 busy = true;
2971 } else {
2972 p->time = ktime_get();
2973 busy = false;
2974 }
2975
2976 count = 0;
2977 do {
2978 struct i915_request *rq;
2979
2980 rq = i915_request_create(ce);
2981 if (IS_ERR(rq)) {
2982 err = PTR_ERR(rq);
2983 break;
2984 }
2985
2986 i915_request_get(rq);
2987 i915_request_add(rq);
2988
2989 err = 0;
2990 if (i915_request_wait(rq, 0, HZ) < 0)
2991 err = -ETIME;
2992 i915_request_put(rq);
2993 if (err)
2994 break;
2995
2996 count++;
2997 } while (!__igt_timeout(end_time, NULL));
2998
2999 if (busy) {
3000 ktime_t now;
3001
3002 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3003 p->busy);
3004 p->time = ktime_sub(now, p->time);
3005 } else {
3006 p->time = ktime_sub(ktime_get(), p->time);
3007 }
3008
3009 err = switch_to_kernel_sync(ce, err);
3010 p->runtime = intel_context_get_total_runtime_ns(ce);
3011 p->count = count;
3012
3013 intel_context_unpin(ce);
3014 intel_context_put(ce);
3015 thread->result = err;
3016}
3017
3018static void p_sync1(struct kthread_work *work)
3019{
3020 struct p_thread *thread = container_of(work, typeof(*thread), work);
3021 struct perf_stats *p = &thread->p;
3022 struct intel_engine_cs *engine = p->engine;
3023 struct i915_request *prev = NULL;
3024 struct intel_context *ce;
3025 IGT_TIMEOUT(end_time);
3026 unsigned long count;
3027 bool busy;
3028 int err = 0;
3029
3030 ce = intel_context_create(engine);
3031 if (IS_ERR(ce)) {
3032 thread->result = PTR_ERR(ce);
3033 return;
3034 }
3035
3036 err = intel_context_pin(ce);
3037 if (err) {
3038 intel_context_put(ce);
3039 thread->result = err;
3040 return;
3041 }
3042
3043 if (intel_engine_supports_stats(engine)) {
3044 p->busy = intel_engine_get_busy_time(engine, &p->time);
3045 busy = true;
3046 } else {
3047 p->time = ktime_get();
3048 busy = false;
3049 }
3050
3051 count = 0;
3052 do {
3053 struct i915_request *rq;
3054
3055 rq = i915_request_create(ce);
3056 if (IS_ERR(rq)) {
3057 err = PTR_ERR(rq);
3058 break;
3059 }
3060
3061 i915_request_get(rq);
3062 i915_request_add(rq);
3063
3064 err = 0;
3065 if (prev && i915_request_wait(prev, 0, HZ) < 0)
3066 err = -ETIME;
3067 i915_request_put(prev);
3068 prev = rq;
3069 if (err)
3070 break;
3071
3072 count++;
3073 } while (!__igt_timeout(end_time, NULL));
3074 i915_request_put(prev);
3075
3076 if (busy) {
3077 ktime_t now;
3078
3079 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3080 p->busy);
3081 p->time = ktime_sub(now, p->time);
3082 } else {
3083 p->time = ktime_sub(ktime_get(), p->time);
3084 }
3085
3086 err = switch_to_kernel_sync(ce, err);
3087 p->runtime = intel_context_get_total_runtime_ns(ce);
3088 p->count = count;
3089
3090 intel_context_unpin(ce);
3091 intel_context_put(ce);
3092 thread->result = err;
3093}
3094
3095static void p_many(struct kthread_work *work)
3096{
3097 struct p_thread *thread = container_of(work, typeof(*thread), work);
3098 struct perf_stats *p = &thread->p;
3099 struct intel_engine_cs *engine = p->engine;
3100 struct intel_context *ce;
3101 IGT_TIMEOUT(end_time);
3102 unsigned long count;
3103 int err = 0;
3104 bool busy;
3105
3106 ce = intel_context_create(engine);
3107 if (IS_ERR(ce)) {
3108 thread->result = PTR_ERR(ce);
3109 return;
3110 }
3111
3112 err = intel_context_pin(ce);
3113 if (err) {
3114 intel_context_put(ce);
3115 thread->result = err;
3116 return;
3117 }
3118
3119 if (intel_engine_supports_stats(engine)) {
3120 p->busy = intel_engine_get_busy_time(engine, &p->time);
3121 busy = true;
3122 } else {
3123 p->time = ktime_get();
3124 busy = false;
3125 }
3126
3127 count = 0;
3128 do {
3129 struct i915_request *rq;
3130
3131 rq = i915_request_create(ce);
3132 if (IS_ERR(rq)) {
3133 err = PTR_ERR(rq);
3134 break;
3135 }
3136
3137 i915_request_add(rq);
3138 count++;
3139 } while (!__igt_timeout(end_time, NULL));
3140
3141 if (busy) {
3142 ktime_t now;
3143
3144 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3145 p->busy);
3146 p->time = ktime_sub(now, p->time);
3147 } else {
3148 p->time = ktime_sub(ktime_get(), p->time);
3149 }
3150
3151 err = switch_to_kernel_sync(ce, err);
3152 p->runtime = intel_context_get_total_runtime_ns(ce);
3153 p->count = count;
3154
3155 intel_context_unpin(ce);
3156 intel_context_put(ce);
3157 thread->result = err;
3158}
3159
3160static int perf_parallel_engines(void *arg)
3161{
3162 struct drm_i915_private *i915 = arg;
3163 static void (* const func[])(struct kthread_work *) = {
3164 p_sync0,
3165 p_sync1,
3166 p_many,
3167 NULL,
3168 };
3169 const unsigned int nengines = num_uabi_engines(i915);
3170 void (* const *fn)(struct kthread_work *);
3171 struct intel_engine_cs *engine;
3172 struct pm_qos_request qos;
3173 struct p_thread *engines;
3174 int err = 0;
3175
3176 engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
3177 if (!engines)
3178 return -ENOMEM;
3179
3180 cpu_latency_qos_add_request(&qos, 0);
3181
3182 for (fn = func; *fn; fn++) {
3183 char name[KSYM_NAME_LEN];
3184 struct igt_live_test t;
3185 unsigned int idx;
3186
3187 snprintf(name, sizeof(name), "%ps", *fn);
3188 err = igt_live_test_begin(&t, i915, __func__, name);
3189 if (err)
3190 break;
3191
3192 atomic_set(&i915->selftest.counter, nengines);
3193
3194 idx = 0;
3195 for_each_uabi_engine(engine, i915) {
3196 struct kthread_worker *worker;
3197
3198 intel_engine_pm_get(engine);
3199
3200 memset(&engines[idx].p, 0, sizeof(engines[idx].p));
3201
3202 worker = kthread_create_worker(0, "igt:%s",
3203 engine->name);
3204 if (IS_ERR(worker)) {
3205 err = PTR_ERR(worker);
3206 intel_engine_pm_put(engine);
3207 break;
3208 }
3209 engines[idx].worker = worker;
3210 engines[idx].result = 0;
3211 engines[idx].p.engine = engine;
3212 engines[idx].engine = engine;
3213
3214 kthread_init_work(&engines[idx].work, *fn);
3215 kthread_queue_work(worker, &engines[idx].work);
3216 idx++;
3217 }
3218
3219 idx = 0;
3220 for_each_uabi_engine(engine, i915) {
3221 int status;
3222
3223 if (!engines[idx].worker)
3224 break;
3225
3226 kthread_flush_work(&engines[idx].work);
3227 status = READ_ONCE(engines[idx].result);
3228 if (status && !err)
3229 err = status;
3230
3231 intel_engine_pm_put(engine);
3232
3233 kthread_destroy_worker(engines[idx].worker);
3234 idx++;
3235 }
3236
3237 if (igt_live_test_end(&t))
3238 err = -EIO;
3239 if (err)
3240 break;
3241
3242 idx = 0;
3243 for_each_uabi_engine(engine, i915) {
3244 struct perf_stats *p = &engines[idx].p;
3245 u64 busy = 100 * ktime_to_ns(p->busy);
3246 u64 dt = ktime_to_ns(p->time);
3247 int integer, decimal;
3248
3249 if (dt) {
3250 integer = div64_u64(busy, dt);
3251 busy -= integer * dt;
3252 decimal = div64_u64(100 * busy, dt);
3253 } else {
3254 integer = 0;
3255 decimal = 0;
3256 }
3257
3258 GEM_BUG_ON(engine != p->engine);
3259 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
3260 name, engine->name, p->count, integer, decimal,
3261 div_u64(p->runtime, 1000 * 1000),
3262 div_u64(ktime_to_ns(p->time), 1000 * 1000));
3263 idx++;
3264 }
3265 }
3266
3267 cpu_latency_qos_remove_request(&qos);
3268 kfree(engines);
3269 return err;
3270}
3271
3272int i915_request_perf_selftests(struct drm_i915_private *i915)
3273{
3274 static const struct i915_subtest tests[] = {
3275 SUBTEST(perf_request_latency),
3276 SUBTEST(perf_series_engines),
3277 SUBTEST(perf_parallel_engines),
3278 };
3279
3280 if (intel_gt_is_wedged(to_gt(i915)))
3281 return 0;
3282
3283 return i915_subtests(tests, i915);
3284}