Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2020 Intel Corporation
4 */
5
6#include <linux/pm_qos.h>
7#include <linux/sort.h>
8
9#include "intel_engine_heartbeat.h"
10#include "intel_engine_pm.h"
11#include "intel_gpu_commands.h"
12#include "intel_gt_clock_utils.h"
13#include "intel_gt_pm.h"
14#include "intel_rc6.h"
15#include "selftest_engine_heartbeat.h"
16#include "selftest_rps.h"
17#include "selftests/igt_flush_test.h"
18#include "selftests/igt_spinner.h"
19#include "selftests/librapl.h"
20
21/* Try to isolate the impact of cstates from determing frequency response */
22#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
24static void dummy_rps_work(struct work_struct *wrk)
25{
26}
27
28static int cmp_u64(const void *A, const void *B)
29{
30 const u64 *a = A, *b = B;
31
32 if (*a < *b)
33 return -1;
34 else if (*a > *b)
35 return 1;
36 else
37 return 0;
38}
39
40static int cmp_u32(const void *A, const void *B)
41{
42 const u32 *a = A, *b = B;
43
44 if (*a < *b)
45 return -1;
46 else if (*a > *b)
47 return 1;
48 else
49 return 0;
50}
51
52static struct i915_vma *
53create_spin_counter(struct intel_engine_cs *engine,
54 struct i915_address_space *vm,
55 bool srm,
56 u32 **cancel,
57 u32 **counter)
58{
59 enum {
60 COUNT,
61 INC,
62 __NGPR__,
63 };
64#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65 struct drm_i915_gem_object *obj;
66 struct i915_vma *vma;
67 unsigned long end;
68 u32 *base, *cs;
69 int loop, i;
70 int err;
71
72 obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73 if (IS_ERR(obj))
74 return ERR_CAST(obj);
75
76 end = obj->base.size / sizeof(u32) - 1;
77
78 vma = i915_vma_instance(obj, vm, NULL);
79 if (IS_ERR(vma)) {
80 i915_gem_object_put(obj);
81 return vma;
82 }
83
84 err = i915_vma_pin(vma, 0, 0, PIN_USER);
85 if (err) {
86 i915_vma_put(vma);
87 return ERR_PTR(err);
88 }
89
90 base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91 if (IS_ERR(base)) {
92 i915_gem_object_put(obj);
93 return ERR_CAST(base);
94 }
95 cs = base;
96
97 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98 for (i = 0; i < __NGPR__; i++) {
99 *cs++ = i915_mmio_reg_offset(CS_GPR(i));
100 *cs++ = 0;
101 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102 *cs++ = 0;
103 }
104
105 *cs++ = MI_LOAD_REGISTER_IMM(1);
106 *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107 *cs++ = 1;
108
109 loop = cs - base;
110
111 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112 for (i = 0; i < 1024; i++) {
113 *cs++ = MI_MATH(4);
114 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116 *cs++ = MI_MATH_ADD;
117 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119 if (srm) {
120 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
121 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122 *cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123 *cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124 }
125 }
126
127 *cs++ = MI_BATCH_BUFFER_START_GEN8;
128 *cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129 *cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130 GEM_BUG_ON(cs - base > end);
131
132 i915_gem_object_flush_map(obj);
133
134 *cancel = base + loop;
135 *counter = srm ? memset32(base + end, 0, 1) : NULL;
136 return vma;
137}
138
139static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
140{
141 u8 history[64], i;
142 unsigned long end;
143 int sleep;
144
145 i = 0;
146 memset(history, freq, sizeof(history));
147 sleep = 20;
148
149 /* The PCU does not change instantly, but drifts towards the goal? */
150 end = jiffies + msecs_to_jiffies(timeout_ms);
151 do {
152 u8 act;
153
154 act = read_cagf(rps);
155 if (time_after(jiffies, end))
156 return act;
157
158 /* Target acquired */
159 if (act == freq)
160 return act;
161
162 /* Any change within the last N samples? */
163 if (!memchr_inv(history, act, sizeof(history)))
164 return act;
165
166 history[i] = act;
167 i = (i + 1) % ARRAY_SIZE(history);
168
169 usleep_range(sleep, 2 * sleep);
170 sleep *= 2;
171 if (sleep > timeout_ms * 20)
172 sleep = timeout_ms * 20;
173 } while (1);
174}
175
176static u8 rps_set_check(struct intel_rps *rps, u8 freq)
177{
178 mutex_lock(&rps->lock);
179 GEM_BUG_ON(!intel_rps_is_active(rps));
180 intel_rps_set(rps, freq);
181 GEM_BUG_ON(rps->last_freq != freq);
182 mutex_unlock(&rps->lock);
183
184 return wait_for_freq(rps, freq, 50);
185}
186
187static void show_pstate_limits(struct intel_rps *rps)
188{
189 struct drm_i915_private *i915 = rps_to_i915(rps);
190
191 if (IS_BROXTON(i915)) {
192 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
193 i915_mmio_reg_offset(BXT_RP_STATE_CAP),
194 intel_uncore_read(rps_to_uncore(rps),
195 BXT_RP_STATE_CAP));
196 } else if (IS_GEN(i915, 9)) {
197 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
198 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
199 intel_uncore_read(rps_to_uncore(rps),
200 GEN9_RP_STATE_LIMITS));
201 }
202}
203
204int live_rps_clock_interval(void *arg)
205{
206 struct intel_gt *gt = arg;
207 struct intel_rps *rps = >->rps;
208 void (*saved_work)(struct work_struct *wrk);
209 struct intel_engine_cs *engine;
210 enum intel_engine_id id;
211 struct igt_spinner spin;
212 int err = 0;
213
214 if (!intel_rps_is_enabled(rps))
215 return 0;
216
217 if (igt_spinner_init(&spin, gt))
218 return -ENOMEM;
219
220 intel_gt_pm_wait_for_idle(gt);
221 saved_work = rps->work.func;
222 rps->work.func = dummy_rps_work;
223
224 intel_gt_pm_get(gt);
225 intel_rps_disable(>->rps);
226
227 intel_gt_check_clock_frequency(gt);
228
229 for_each_engine(engine, gt, id) {
230 struct i915_request *rq;
231 u32 cycles;
232 u64 dt;
233
234 if (!intel_engine_can_store_dword(engine))
235 continue;
236
237 st_engine_heartbeat_disable(engine);
238
239 rq = igt_spinner_create_request(&spin,
240 engine->kernel_context,
241 MI_NOOP);
242 if (IS_ERR(rq)) {
243 st_engine_heartbeat_enable(engine);
244 err = PTR_ERR(rq);
245 break;
246 }
247
248 i915_request_add(rq);
249
250 if (!igt_wait_for_spinner(&spin, rq)) {
251 pr_err("%s: RPS spinner did not start\n",
252 engine->name);
253 igt_spinner_end(&spin);
254 st_engine_heartbeat_enable(engine);
255 intel_gt_set_wedged(engine->gt);
256 err = -EIO;
257 break;
258 }
259
260 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
261
262 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
263
264 /* Set the evaluation interval to infinity! */
265 intel_uncore_write_fw(gt->uncore,
266 GEN6_RP_UP_EI, 0xffffffff);
267 intel_uncore_write_fw(gt->uncore,
268 GEN6_RP_UP_THRESHOLD, 0xffffffff);
269
270 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
271 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
272
273 if (wait_for(intel_uncore_read_fw(gt->uncore,
274 GEN6_RP_CUR_UP_EI),
275 10)) {
276 /* Just skip the test; assume lack of HW support */
277 pr_notice("%s: rps evaluation interval not ticking\n",
278 engine->name);
279 err = -ENODEV;
280 } else {
281 ktime_t dt_[5];
282 u32 cycles_[5];
283 int i;
284
285 for (i = 0; i < 5; i++) {
286 preempt_disable();
287
288 dt_[i] = ktime_get();
289 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
290
291 udelay(1000);
292
293 dt_[i] = ktime_sub(ktime_get(), dt_[i]);
294 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
295
296 preempt_enable();
297 }
298
299 /* Use the median of both cycle/dt; close enough */
300 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
301 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
302 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
303 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
304 }
305
306 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
307 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
308
309 igt_spinner_end(&spin);
310 st_engine_heartbeat_enable(engine);
311
312 if (err == 0) {
313 u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
314 u32 expected =
315 intel_gt_ns_to_pm_interval(gt, dt);
316
317 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
318 engine->name, cycles, time, dt, expected,
319 gt->clock_frequency / 1000);
320
321 if (10 * time < 8 * dt ||
322 8 * time > 10 * dt) {
323 pr_err("%s: rps clock time does not match walltime!\n",
324 engine->name);
325 err = -EINVAL;
326 }
327
328 if (10 * expected < 8 * cycles ||
329 8 * expected > 10 * cycles) {
330 pr_err("%s: walltime does not match rps clock ticks!\n",
331 engine->name);
332 err = -EINVAL;
333 }
334 }
335
336 if (igt_flush_test(gt->i915))
337 err = -EIO;
338
339 break; /* once is enough */
340 }
341
342 intel_rps_enable(>->rps);
343 intel_gt_pm_put(gt);
344
345 igt_spinner_fini(&spin);
346
347 intel_gt_pm_wait_for_idle(gt);
348 rps->work.func = saved_work;
349
350 if (err == -ENODEV) /* skipped, don't report a fail */
351 err = 0;
352
353 return err;
354}
355
356int live_rps_control(void *arg)
357{
358 struct intel_gt *gt = arg;
359 struct intel_rps *rps = >->rps;
360 void (*saved_work)(struct work_struct *wrk);
361 struct intel_engine_cs *engine;
362 enum intel_engine_id id;
363 struct igt_spinner spin;
364 int err = 0;
365
366 /*
367 * Check that the actual frequency matches our requested frequency,
368 * to verify our control mechanism. We have to be careful that the
369 * PCU may throttle the GPU in which case the actual frequency used
370 * will be lowered than requested.
371 */
372
373 if (!intel_rps_is_enabled(rps))
374 return 0;
375
376 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
377 return 0;
378
379 if (igt_spinner_init(&spin, gt))
380 return -ENOMEM;
381
382 intel_gt_pm_wait_for_idle(gt);
383 saved_work = rps->work.func;
384 rps->work.func = dummy_rps_work;
385
386 intel_gt_pm_get(gt);
387 for_each_engine(engine, gt, id) {
388 struct i915_request *rq;
389 ktime_t min_dt, max_dt;
390 int f, limit;
391 int min, max;
392
393 if (!intel_engine_can_store_dword(engine))
394 continue;
395
396 st_engine_heartbeat_disable(engine);
397
398 rq = igt_spinner_create_request(&spin,
399 engine->kernel_context,
400 MI_NOOP);
401 if (IS_ERR(rq)) {
402 err = PTR_ERR(rq);
403 break;
404 }
405
406 i915_request_add(rq);
407
408 if (!igt_wait_for_spinner(&spin, rq)) {
409 pr_err("%s: RPS spinner did not start\n",
410 engine->name);
411 igt_spinner_end(&spin);
412 st_engine_heartbeat_enable(engine);
413 intel_gt_set_wedged(engine->gt);
414 err = -EIO;
415 break;
416 }
417
418 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
419 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
420 engine->name, rps->min_freq, read_cagf(rps));
421 igt_spinner_end(&spin);
422 st_engine_heartbeat_enable(engine);
423 show_pstate_limits(rps);
424 err = -EINVAL;
425 break;
426 }
427
428 for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
429 if (rps_set_check(rps, f) < f)
430 break;
431 }
432
433 limit = rps_set_check(rps, f);
434
435 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
436 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
437 engine->name, rps->min_freq, read_cagf(rps));
438 igt_spinner_end(&spin);
439 st_engine_heartbeat_enable(engine);
440 show_pstate_limits(rps);
441 err = -EINVAL;
442 break;
443 }
444
445 max_dt = ktime_get();
446 max = rps_set_check(rps, limit);
447 max_dt = ktime_sub(ktime_get(), max_dt);
448
449 min_dt = ktime_get();
450 min = rps_set_check(rps, rps->min_freq);
451 min_dt = ktime_sub(ktime_get(), min_dt);
452
453 igt_spinner_end(&spin);
454 st_engine_heartbeat_enable(engine);
455
456 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
457 engine->name,
458 rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
459 rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
460 limit, intel_gpu_freq(rps, limit),
461 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
462
463 if (limit == rps->min_freq) {
464 pr_err("%s: GPU throttled to minimum!\n",
465 engine->name);
466 show_pstate_limits(rps);
467 err = -ENODEV;
468 break;
469 }
470
471 if (igt_flush_test(gt->i915)) {
472 err = -EIO;
473 break;
474 }
475 }
476 intel_gt_pm_put(gt);
477
478 igt_spinner_fini(&spin);
479
480 intel_gt_pm_wait_for_idle(gt);
481 rps->work.func = saved_work;
482
483 return err;
484}
485
486static void show_pcu_config(struct intel_rps *rps)
487{
488 struct drm_i915_private *i915 = rps_to_i915(rps);
489 unsigned int max_gpu_freq, min_gpu_freq;
490 intel_wakeref_t wakeref;
491 int gpu_freq;
492
493 if (!HAS_LLC(i915))
494 return;
495
496 min_gpu_freq = rps->min_freq;
497 max_gpu_freq = rps->max_freq;
498 if (INTEL_GEN(i915) >= 9) {
499 /* Convert GT frequency to 50 HZ units */
500 min_gpu_freq /= GEN9_FREQ_SCALER;
501 max_gpu_freq /= GEN9_FREQ_SCALER;
502 }
503
504 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
505
506 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
507 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
508 int ia_freq = gpu_freq;
509
510 sandybridge_pcode_read(i915,
511 GEN6_PCODE_READ_MIN_FREQ_TABLE,
512 &ia_freq, NULL);
513
514 pr_info("%5d %5d %5d\n",
515 gpu_freq * 50,
516 ((ia_freq >> 0) & 0xff) * 100,
517 ((ia_freq >> 8) & 0xff) * 100);
518 }
519
520 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
521}
522
523static u64 __measure_frequency(u32 *cntr, int duration_ms)
524{
525 u64 dc, dt;
526
527 dt = ktime_get();
528 dc = READ_ONCE(*cntr);
529 usleep_range(1000 * duration_ms, 2000 * duration_ms);
530 dc = READ_ONCE(*cntr) - dc;
531 dt = ktime_get() - dt;
532
533 return div64_u64(1000 * 1000 * dc, dt);
534}
535
536static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
537{
538 u64 x[5];
539 int i;
540
541 *freq = rps_set_check(rps, *freq);
542 for (i = 0; i < 5; i++)
543 x[i] = __measure_frequency(cntr, 2);
544 *freq = (*freq + read_cagf(rps)) / 2;
545
546 /* A simple triangle filter for better result stability */
547 sort(x, 5, sizeof(*x), cmp_u64, NULL);
548 return div_u64(x[1] + 2 * x[2] + x[3], 4);
549}
550
551static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
552 int duration_ms)
553{
554 u64 dc, dt;
555
556 dt = ktime_get();
557 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
558 usleep_range(1000 * duration_ms, 2000 * duration_ms);
559 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
560 dt = ktime_get() - dt;
561
562 return div64_u64(1000 * 1000 * dc, dt);
563}
564
565static u64 measure_cs_frequency_at(struct intel_rps *rps,
566 struct intel_engine_cs *engine,
567 int *freq)
568{
569 u64 x[5];
570 int i;
571
572 *freq = rps_set_check(rps, *freq);
573 for (i = 0; i < 5; i++)
574 x[i] = __measure_cs_frequency(engine, 2);
575 *freq = (*freq + read_cagf(rps)) / 2;
576
577 /* A simple triangle filter for better result stability */
578 sort(x, 5, sizeof(*x), cmp_u64, NULL);
579 return div_u64(x[1] + 2 * x[2] + x[3], 4);
580}
581
582static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
583{
584 return f_d * x > f_n * y && f_n * x < f_d * y;
585}
586
587int live_rps_frequency_cs(void *arg)
588{
589 void (*saved_work)(struct work_struct *wrk);
590 struct intel_gt *gt = arg;
591 struct intel_rps *rps = >->rps;
592 struct intel_engine_cs *engine;
593 struct pm_qos_request qos;
594 enum intel_engine_id id;
595 int err = 0;
596
597 /*
598 * The premise is that the GPU does change freqency at our behest.
599 * Let's check there is a correspondence between the requested
600 * frequency, the actual frequency, and the observed clock rate.
601 */
602
603 if (!intel_rps_is_enabled(rps))
604 return 0;
605
606 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
607 return 0;
608
609 if (CPU_LATENCY >= 0)
610 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
611
612 intel_gt_pm_wait_for_idle(gt);
613 saved_work = rps->work.func;
614 rps->work.func = dummy_rps_work;
615
616 for_each_engine(engine, gt, id) {
617 struct i915_request *rq;
618 struct i915_vma *vma;
619 u32 *cancel, *cntr;
620 struct {
621 u64 count;
622 int freq;
623 } min, max;
624
625 st_engine_heartbeat_disable(engine);
626
627 vma = create_spin_counter(engine,
628 engine->kernel_context->vm, false,
629 &cancel, &cntr);
630 if (IS_ERR(vma)) {
631 err = PTR_ERR(vma);
632 st_engine_heartbeat_enable(engine);
633 break;
634 }
635
636 rq = intel_engine_create_kernel_request(engine);
637 if (IS_ERR(rq)) {
638 err = PTR_ERR(rq);
639 goto err_vma;
640 }
641
642 i915_vma_lock(vma);
643 err = i915_request_await_object(rq, vma->obj, false);
644 if (!err)
645 err = i915_vma_move_to_active(vma, rq, 0);
646 if (!err)
647 err = rq->engine->emit_bb_start(rq,
648 vma->node.start,
649 PAGE_SIZE, 0);
650 i915_vma_unlock(vma);
651 i915_request_add(rq);
652 if (err)
653 goto err_vma;
654
655 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
656 10)) {
657 pr_err("%s: timed loop did not start\n",
658 engine->name);
659 goto err_vma;
660 }
661
662 min.freq = rps->min_freq;
663 min.count = measure_cs_frequency_at(rps, engine, &min.freq);
664
665 max.freq = rps->max_freq;
666 max.count = measure_cs_frequency_at(rps, engine, &max.freq);
667
668 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
669 engine->name,
670 min.count, intel_gpu_freq(rps, min.freq),
671 max.count, intel_gpu_freq(rps, max.freq),
672 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
673 max.freq * min.count));
674
675 if (!scaled_within(max.freq * min.count,
676 min.freq * max.count,
677 2, 3)) {
678 int f;
679
680 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
681 engine->name,
682 max.freq * min.count,
683 min.freq * max.count);
684 show_pcu_config(rps);
685
686 for (f = min.freq + 1; f <= rps->max_freq; f++) {
687 int act = f;
688 u64 count;
689
690 count = measure_cs_frequency_at(rps, engine, &act);
691 if (act < f)
692 break;
693
694 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
695 engine->name,
696 act, intel_gpu_freq(rps, act), count,
697 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
698 act * min.count));
699
700 f = act; /* may skip ahead [pcu granularity] */
701 }
702
703 err = -EINVAL;
704 }
705
706err_vma:
707 *cancel = MI_BATCH_BUFFER_END;
708 i915_gem_object_flush_map(vma->obj);
709 i915_gem_object_unpin_map(vma->obj);
710 i915_vma_unpin(vma);
711 i915_vma_put(vma);
712
713 st_engine_heartbeat_enable(engine);
714 if (igt_flush_test(gt->i915))
715 err = -EIO;
716 if (err)
717 break;
718 }
719
720 intel_gt_pm_wait_for_idle(gt);
721 rps->work.func = saved_work;
722
723 if (CPU_LATENCY >= 0)
724 cpu_latency_qos_remove_request(&qos);
725
726 return err;
727}
728
729int live_rps_frequency_srm(void *arg)
730{
731 void (*saved_work)(struct work_struct *wrk);
732 struct intel_gt *gt = arg;
733 struct intel_rps *rps = >->rps;
734 struct intel_engine_cs *engine;
735 struct pm_qos_request qos;
736 enum intel_engine_id id;
737 int err = 0;
738
739 /*
740 * The premise is that the GPU does change freqency at our behest.
741 * Let's check there is a correspondence between the requested
742 * frequency, the actual frequency, and the observed clock rate.
743 */
744
745 if (!intel_rps_is_enabled(rps))
746 return 0;
747
748 if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
749 return 0;
750
751 if (CPU_LATENCY >= 0)
752 cpu_latency_qos_add_request(&qos, CPU_LATENCY);
753
754 intel_gt_pm_wait_for_idle(gt);
755 saved_work = rps->work.func;
756 rps->work.func = dummy_rps_work;
757
758 for_each_engine(engine, gt, id) {
759 struct i915_request *rq;
760 struct i915_vma *vma;
761 u32 *cancel, *cntr;
762 struct {
763 u64 count;
764 int freq;
765 } min, max;
766
767 st_engine_heartbeat_disable(engine);
768
769 vma = create_spin_counter(engine,
770 engine->kernel_context->vm, true,
771 &cancel, &cntr);
772 if (IS_ERR(vma)) {
773 err = PTR_ERR(vma);
774 st_engine_heartbeat_enable(engine);
775 break;
776 }
777
778 rq = intel_engine_create_kernel_request(engine);
779 if (IS_ERR(rq)) {
780 err = PTR_ERR(rq);
781 goto err_vma;
782 }
783
784 i915_vma_lock(vma);
785 err = i915_request_await_object(rq, vma->obj, false);
786 if (!err)
787 err = i915_vma_move_to_active(vma, rq, 0);
788 if (!err)
789 err = rq->engine->emit_bb_start(rq,
790 vma->node.start,
791 PAGE_SIZE, 0);
792 i915_vma_unlock(vma);
793 i915_request_add(rq);
794 if (err)
795 goto err_vma;
796
797 if (wait_for(READ_ONCE(*cntr), 10)) {
798 pr_err("%s: timed loop did not start\n",
799 engine->name);
800 goto err_vma;
801 }
802
803 min.freq = rps->min_freq;
804 min.count = measure_frequency_at(rps, cntr, &min.freq);
805
806 max.freq = rps->max_freq;
807 max.count = measure_frequency_at(rps, cntr, &max.freq);
808
809 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
810 engine->name,
811 min.count, intel_gpu_freq(rps, min.freq),
812 max.count, intel_gpu_freq(rps, max.freq),
813 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
814 max.freq * min.count));
815
816 if (!scaled_within(max.freq * min.count,
817 min.freq * max.count,
818 1, 2)) {
819 int f;
820
821 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
822 engine->name,
823 max.freq * min.count,
824 min.freq * max.count);
825 show_pcu_config(rps);
826
827 for (f = min.freq + 1; f <= rps->max_freq; f++) {
828 int act = f;
829 u64 count;
830
831 count = measure_frequency_at(rps, cntr, &act);
832 if (act < f)
833 break;
834
835 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
836 engine->name,
837 act, intel_gpu_freq(rps, act), count,
838 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
839 act * min.count));
840
841 f = act; /* may skip ahead [pcu granularity] */
842 }
843
844 err = -EINVAL;
845 }
846
847err_vma:
848 *cancel = MI_BATCH_BUFFER_END;
849 i915_gem_object_flush_map(vma->obj);
850 i915_gem_object_unpin_map(vma->obj);
851 i915_vma_unpin(vma);
852 i915_vma_put(vma);
853
854 st_engine_heartbeat_enable(engine);
855 if (igt_flush_test(gt->i915))
856 err = -EIO;
857 if (err)
858 break;
859 }
860
861 intel_gt_pm_wait_for_idle(gt);
862 rps->work.func = saved_work;
863
864 if (CPU_LATENCY >= 0)
865 cpu_latency_qos_remove_request(&qos);
866
867 return err;
868}
869
870static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
871{
872 /* Flush any previous EI */
873 usleep_range(timeout_us, 2 * timeout_us);
874
875 /* Reset the interrupt status */
876 rps_disable_interrupts(rps);
877 GEM_BUG_ON(rps->pm_iir);
878 rps_enable_interrupts(rps);
879
880 /* And then wait for the timeout, for real this time */
881 usleep_range(2 * timeout_us, 3 * timeout_us);
882}
883
884static int __rps_up_interrupt(struct intel_rps *rps,
885 struct intel_engine_cs *engine,
886 struct igt_spinner *spin)
887{
888 struct intel_uncore *uncore = engine->uncore;
889 struct i915_request *rq;
890 u32 timeout;
891
892 if (!intel_engine_can_store_dword(engine))
893 return 0;
894
895 rps_set_check(rps, rps->min_freq);
896
897 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
898 if (IS_ERR(rq))
899 return PTR_ERR(rq);
900
901 i915_request_get(rq);
902 i915_request_add(rq);
903
904 if (!igt_wait_for_spinner(spin, rq)) {
905 pr_err("%s: RPS spinner did not start\n",
906 engine->name);
907 i915_request_put(rq);
908 intel_gt_set_wedged(engine->gt);
909 return -EIO;
910 }
911
912 if (!intel_rps_is_active(rps)) {
913 pr_err("%s: RPS not enabled on starting spinner\n",
914 engine->name);
915 igt_spinner_end(spin);
916 i915_request_put(rq);
917 return -EINVAL;
918 }
919
920 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
921 pr_err("%s: RPS did not register UP interrupt\n",
922 engine->name);
923 i915_request_put(rq);
924 return -EINVAL;
925 }
926
927 if (rps->last_freq != rps->min_freq) {
928 pr_err("%s: RPS did not program min frequency\n",
929 engine->name);
930 i915_request_put(rq);
931 return -EINVAL;
932 }
933
934 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
935 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
936 timeout = DIV_ROUND_UP(timeout, 1000);
937
938 sleep_for_ei(rps, timeout);
939 GEM_BUG_ON(i915_request_completed(rq));
940
941 igt_spinner_end(spin);
942 i915_request_put(rq);
943
944 if (rps->cur_freq != rps->min_freq) {
945 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
946 engine->name, intel_rps_read_actual_frequency(rps));
947 return -EINVAL;
948 }
949
950 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
951 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
952 engine->name, rps->pm_iir,
953 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
954 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
955 intel_uncore_read(uncore, GEN6_RP_UP_EI));
956 return -EINVAL;
957 }
958
959 return 0;
960}
961
962static int __rps_down_interrupt(struct intel_rps *rps,
963 struct intel_engine_cs *engine)
964{
965 struct intel_uncore *uncore = engine->uncore;
966 u32 timeout;
967
968 rps_set_check(rps, rps->max_freq);
969
970 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
971 pr_err("%s: RPS did not register DOWN interrupt\n",
972 engine->name);
973 return -EINVAL;
974 }
975
976 if (rps->last_freq != rps->max_freq) {
977 pr_err("%s: RPS did not program max frequency\n",
978 engine->name);
979 return -EINVAL;
980 }
981
982 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
983 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
984 timeout = DIV_ROUND_UP(timeout, 1000);
985
986 sleep_for_ei(rps, timeout);
987
988 if (rps->cur_freq != rps->max_freq) {
989 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
990 engine->name,
991 intel_rps_read_actual_frequency(rps));
992 return -EINVAL;
993 }
994
995 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
996 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
997 engine->name, rps->pm_iir,
998 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
999 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1000 intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1001 intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1002 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1003 intel_uncore_read(uncore, GEN6_RP_UP_EI));
1004 return -EINVAL;
1005 }
1006
1007 return 0;
1008}
1009
1010int live_rps_interrupt(void *arg)
1011{
1012 struct intel_gt *gt = arg;
1013 struct intel_rps *rps = >->rps;
1014 void (*saved_work)(struct work_struct *wrk);
1015 struct intel_engine_cs *engine;
1016 enum intel_engine_id id;
1017 struct igt_spinner spin;
1018 u32 pm_events;
1019 int err = 0;
1020
1021 /*
1022 * First, let's check whether or not we are receiving interrupts.
1023 */
1024
1025 if (!intel_rps_has_interrupts(rps))
1026 return 0;
1027
1028 intel_gt_pm_get(gt);
1029 pm_events = rps->pm_events;
1030 intel_gt_pm_put(gt);
1031 if (!pm_events) {
1032 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1033 return -ENODEV;
1034 }
1035
1036 if (igt_spinner_init(&spin, gt))
1037 return -ENOMEM;
1038
1039 intel_gt_pm_wait_for_idle(gt);
1040 saved_work = rps->work.func;
1041 rps->work.func = dummy_rps_work;
1042
1043 for_each_engine(engine, gt, id) {
1044 /* Keep the engine busy with a spinner; expect an UP! */
1045 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1046 intel_gt_pm_wait_for_idle(engine->gt);
1047 GEM_BUG_ON(intel_rps_is_active(rps));
1048
1049 st_engine_heartbeat_disable(engine);
1050
1051 err = __rps_up_interrupt(rps, engine, &spin);
1052
1053 st_engine_heartbeat_enable(engine);
1054 if (err)
1055 goto out;
1056
1057 intel_gt_pm_wait_for_idle(engine->gt);
1058 }
1059
1060 /* Keep the engine awake but idle and check for DOWN */
1061 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1062 st_engine_heartbeat_disable(engine);
1063 intel_rc6_disable(>->rc6);
1064
1065 err = __rps_down_interrupt(rps, engine);
1066
1067 intel_rc6_enable(>->rc6);
1068 st_engine_heartbeat_enable(engine);
1069 if (err)
1070 goto out;
1071 }
1072 }
1073
1074out:
1075 if (igt_flush_test(gt->i915))
1076 err = -EIO;
1077
1078 igt_spinner_fini(&spin);
1079
1080 intel_gt_pm_wait_for_idle(gt);
1081 rps->work.func = saved_work;
1082
1083 return err;
1084}
1085
1086static u64 __measure_power(int duration_ms)
1087{
1088 u64 dE, dt;
1089
1090 dt = ktime_get();
1091 dE = librapl_energy_uJ();
1092 usleep_range(1000 * duration_ms, 2000 * duration_ms);
1093 dE = librapl_energy_uJ() - dE;
1094 dt = ktime_get() - dt;
1095
1096 return div64_u64(1000 * 1000 * dE, dt);
1097}
1098
1099static u64 measure_power_at(struct intel_rps *rps, int *freq)
1100{
1101 u64 x[5];
1102 int i;
1103
1104 *freq = rps_set_check(rps, *freq);
1105 for (i = 0; i < 5; i++)
1106 x[i] = __measure_power(5);
1107 *freq = (*freq + read_cagf(rps)) / 2;
1108
1109 /* A simple triangle filter for better result stability */
1110 sort(x, 5, sizeof(*x), cmp_u64, NULL);
1111 return div_u64(x[1] + 2 * x[2] + x[3], 4);
1112}
1113
1114int live_rps_power(void *arg)
1115{
1116 struct intel_gt *gt = arg;
1117 struct intel_rps *rps = >->rps;
1118 void (*saved_work)(struct work_struct *wrk);
1119 struct intel_engine_cs *engine;
1120 enum intel_engine_id id;
1121 struct igt_spinner spin;
1122 int err = 0;
1123
1124 /*
1125 * Our fundamental assumption is that running at lower frequency
1126 * actually saves power. Let's see if our RAPL measurement support
1127 * that theory.
1128 */
1129
1130 if (!intel_rps_is_enabled(rps))
1131 return 0;
1132
1133 if (!librapl_energy_uJ())
1134 return 0;
1135
1136 if (igt_spinner_init(&spin, gt))
1137 return -ENOMEM;
1138
1139 intel_gt_pm_wait_for_idle(gt);
1140 saved_work = rps->work.func;
1141 rps->work.func = dummy_rps_work;
1142
1143 for_each_engine(engine, gt, id) {
1144 struct i915_request *rq;
1145 struct {
1146 u64 power;
1147 int freq;
1148 } min, max;
1149
1150 if (!intel_engine_can_store_dword(engine))
1151 continue;
1152
1153 st_engine_heartbeat_disable(engine);
1154
1155 rq = igt_spinner_create_request(&spin,
1156 engine->kernel_context,
1157 MI_NOOP);
1158 if (IS_ERR(rq)) {
1159 st_engine_heartbeat_enable(engine);
1160 err = PTR_ERR(rq);
1161 break;
1162 }
1163
1164 i915_request_add(rq);
1165
1166 if (!igt_wait_for_spinner(&spin, rq)) {
1167 pr_err("%s: RPS spinner did not start\n",
1168 engine->name);
1169 igt_spinner_end(&spin);
1170 st_engine_heartbeat_enable(engine);
1171 intel_gt_set_wedged(engine->gt);
1172 err = -EIO;
1173 break;
1174 }
1175
1176 max.freq = rps->max_freq;
1177 max.power = measure_power_at(rps, &max.freq);
1178
1179 min.freq = rps->min_freq;
1180 min.power = measure_power_at(rps, &min.freq);
1181
1182 igt_spinner_end(&spin);
1183 st_engine_heartbeat_enable(engine);
1184
1185 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1186 engine->name,
1187 min.power, intel_gpu_freq(rps, min.freq),
1188 max.power, intel_gpu_freq(rps, max.freq));
1189
1190 if (10 * min.freq >= 9 * max.freq) {
1191 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1192 min.freq, intel_gpu_freq(rps, min.freq),
1193 max.freq, intel_gpu_freq(rps, max.freq));
1194 continue;
1195 }
1196
1197 if (11 * min.power > 10 * max.power) {
1198 pr_err("%s: did not conserve power when setting lower frequency!\n",
1199 engine->name);
1200 err = -EINVAL;
1201 break;
1202 }
1203
1204 if (igt_flush_test(gt->i915)) {
1205 err = -EIO;
1206 break;
1207 }
1208 }
1209
1210 igt_spinner_fini(&spin);
1211
1212 intel_gt_pm_wait_for_idle(gt);
1213 rps->work.func = saved_work;
1214
1215 return err;
1216}
1217
1218int live_rps_dynamic(void *arg)
1219{
1220 struct intel_gt *gt = arg;
1221 struct intel_rps *rps = >->rps;
1222 struct intel_engine_cs *engine;
1223 enum intel_engine_id id;
1224 struct igt_spinner spin;
1225 int err = 0;
1226
1227 /*
1228 * We've looked at the bascs, and have established that we
1229 * can change the clock frequency and that the HW will generate
1230 * interrupts based on load. Now we check how we integrate those
1231 * moving parts into dynamic reclocking based on load.
1232 */
1233
1234 if (!intel_rps_is_enabled(rps))
1235 return 0;
1236
1237 if (igt_spinner_init(&spin, gt))
1238 return -ENOMEM;
1239
1240 if (intel_rps_has_interrupts(rps))
1241 pr_info("RPS has interrupt support\n");
1242 if (intel_rps_uses_timer(rps))
1243 pr_info("RPS has timer support\n");
1244
1245 for_each_engine(engine, gt, id) {
1246 struct i915_request *rq;
1247 struct {
1248 ktime_t dt;
1249 u8 freq;
1250 } min, max;
1251
1252 if (!intel_engine_can_store_dword(engine))
1253 continue;
1254
1255 intel_gt_pm_wait_for_idle(gt);
1256 GEM_BUG_ON(intel_rps_is_active(rps));
1257 rps->cur_freq = rps->min_freq;
1258
1259 intel_engine_pm_get(engine);
1260 intel_rc6_disable(>->rc6);
1261 GEM_BUG_ON(rps->last_freq != rps->min_freq);
1262
1263 rq = igt_spinner_create_request(&spin,
1264 engine->kernel_context,
1265 MI_NOOP);
1266 if (IS_ERR(rq)) {
1267 err = PTR_ERR(rq);
1268 goto err;
1269 }
1270
1271 i915_request_add(rq);
1272
1273 max.dt = ktime_get();
1274 max.freq = wait_for_freq(rps, rps->max_freq, 500);
1275 max.dt = ktime_sub(ktime_get(), max.dt);
1276
1277 igt_spinner_end(&spin);
1278
1279 min.dt = ktime_get();
1280 min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1281 min.dt = ktime_sub(ktime_get(), min.dt);
1282
1283 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1284 engine->name,
1285 max.freq, intel_gpu_freq(rps, max.freq),
1286 ktime_to_ns(max.dt),
1287 min.freq, intel_gpu_freq(rps, min.freq),
1288 ktime_to_ns(min.dt));
1289 if (min.freq >= max.freq) {
1290 pr_err("%s: dynamic reclocking of spinner failed\n!",
1291 engine->name);
1292 err = -EINVAL;
1293 }
1294
1295err:
1296 intel_rc6_enable(>->rc6);
1297 intel_engine_pm_put(engine);
1298
1299 if (igt_flush_test(gt->i915))
1300 err = -EIO;
1301 if (err)
1302 break;
1303 }
1304
1305 igt_spinner_fini(&spin);
1306
1307 return err;
1308}