Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "gem/i915_gem_internal.h"
  10
  11#include "i915_reg.h"
  12#include "intel_engine_heartbeat.h"
  13#include "intel_engine_pm.h"
  14#include "intel_engine_regs.h"
  15#include "intel_gpu_commands.h"
  16#include "intel_gt_clock_utils.h"
  17#include "intel_gt_pm.h"
  18#include "intel_rc6.h"
  19#include "selftest_engine_heartbeat.h"
  20#include "selftest_rps.h"
  21#include "selftests/igt_flush_test.h"
  22#include "selftests/igt_spinner.h"
  23#include "selftests/librapl.h"
  24
  25/* Try to isolate the impact of cstates from determing frequency response */
  26#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  27
  28static void dummy_rps_work(struct work_struct *wrk)
  29{
  30}
  31
  32static int cmp_u64(const void *A, const void *B)
  33{
  34	const u64 *a = A, *b = B;
  35
  36	if (*a < *b)
  37		return -1;
  38	else if (*a > *b)
  39		return 1;
  40	else
  41		return 0;
  42}
  43
  44static int cmp_u32(const void *A, const void *B)
  45{
  46	const u32 *a = A, *b = B;
  47
  48	if (*a < *b)
  49		return -1;
  50	else if (*a > *b)
  51		return 1;
  52	else
  53		return 0;
  54}
  55
  56static struct i915_vma *
  57create_spin_counter(struct intel_engine_cs *engine,
  58		    struct i915_address_space *vm,
  59		    bool srm,
  60		    u32 **cancel,
  61		    u32 **counter)
  62{
  63	enum {
  64		COUNT,
  65		INC,
  66		__NGPR__,
  67	};
  68#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  69	struct drm_i915_gem_object *obj;
  70	struct i915_vma *vma;
  71	unsigned long end;
  72	u32 *base, *cs;
  73	int loop, i;
  74	int err;
  75
  76	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  77	if (IS_ERR(obj))
  78		return ERR_CAST(obj);
  79
  80	end = obj->base.size / sizeof(u32) - 1;
  81
  82	vma = i915_vma_instance(obj, vm, NULL);
  83	if (IS_ERR(vma)) {
  84		err = PTR_ERR(vma);
  85		goto err_put;
  86	}
  87
  88	err = i915_vma_pin(vma, 0, 0, PIN_USER);
  89	if (err)
  90		goto err_unlock;
  91
  92	i915_vma_lock(vma);
  93
  94	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  95	if (IS_ERR(base)) {
  96		err = PTR_ERR(base);
  97		goto err_unpin;
  98	}
  99	cs = base;
 100
 101	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
 102	for (i = 0; i < __NGPR__; i++) {
 103		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
 104		*cs++ = 0;
 105		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 106		*cs++ = 0;
 107	}
 108
 109	*cs++ = MI_LOAD_REGISTER_IMM(1);
 110	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 111	*cs++ = 1;
 112
 113	loop = cs - base;
 114
 115	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 116	for (i = 0; i < 1024; i++) {
 117		*cs++ = MI_MATH(4);
 118		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 119		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 120		*cs++ = MI_MATH_ADD;
 121		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 122
 123		if (srm) {
 124			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
 125			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 126			*cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
 127			*cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
 128		}
 129	}
 130
 131	*cs++ = MI_BATCH_BUFFER_START_GEN8;
 132	*cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
 133	*cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
 134	GEM_BUG_ON(cs - base > end);
 135
 136	i915_gem_object_flush_map(obj);
 137
 138	*cancel = base + loop;
 139	*counter = srm ? memset32(base + end, 0, 1) : NULL;
 140	return vma;
 141
 142err_unpin:
 143	i915_vma_unpin(vma);
 144err_unlock:
 145	i915_vma_unlock(vma);
 146err_put:
 147	i915_gem_object_put(obj);
 148	return ERR_PTR(err);
 149}
 150
 151static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 152{
 153	u8 history[64], i;
 154	unsigned long end;
 155	int sleep;
 156
 157	i = 0;
 158	memset(history, freq, sizeof(history));
 159	sleep = 20;
 160
 161	/* The PCU does not change instantly, but drifts towards the goal? */
 162	end = jiffies + msecs_to_jiffies(timeout_ms);
 163	do {
 164		u8 act;
 165
 166		act = read_cagf(rps);
 167		if (time_after(jiffies, end))
 168			return act;
 169
 170		/* Target acquired */
 171		if (act == freq)
 172			return act;
 173
 174		/* Any change within the last N samples? */
 175		if (!memchr_inv(history, act, sizeof(history)))
 176			return act;
 177
 178		history[i] = act;
 179		i = (i + 1) % ARRAY_SIZE(history);
 180
 181		usleep_range(sleep, 2 * sleep);
 182		sleep *= 2;
 183		if (sleep > timeout_ms * 20)
 184			sleep = timeout_ms * 20;
 185	} while (1);
 186}
 187
 188static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 189{
 190	mutex_lock(&rps->lock);
 191	GEM_BUG_ON(!intel_rps_is_active(rps));
 192	if (wait_for(!intel_rps_set(rps, freq), 50)) {
 193		mutex_unlock(&rps->lock);
 194		return 0;
 195	}
 196	GEM_BUG_ON(rps->last_freq != freq);
 197	mutex_unlock(&rps->lock);
 198
 199	return wait_for_freq(rps, freq, 50);
 200}
 201
 202static void show_pstate_limits(struct intel_rps *rps)
 203{
 204	struct drm_i915_private *i915 = rps_to_i915(rps);
 205
 206	if (IS_BROXTON(i915)) {
 207		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 208			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 209			intel_uncore_read(rps_to_uncore(rps),
 210					  BXT_RP_STATE_CAP));
 211	} else if (GRAPHICS_VER(i915) == 9) {
 212		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 213			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 214			intel_uncore_read(rps_to_uncore(rps),
 215					  GEN9_RP_STATE_LIMITS));
 216	}
 217}
 218
 219int live_rps_clock_interval(void *arg)
 220{
 221	struct intel_gt *gt = arg;
 222	struct intel_rps *rps = &gt->rps;
 223	void (*saved_work)(struct work_struct *wrk);
 224	struct intel_engine_cs *engine;
 225	enum intel_engine_id id;
 226	struct igt_spinner spin;
 227	intel_wakeref_t wakeref;
 228	int err = 0;
 229
 230	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
 231		return 0;
 232
 233	if (igt_spinner_init(&spin, gt))
 234		return -ENOMEM;
 235
 236	intel_gt_pm_wait_for_idle(gt);
 237	saved_work = rps->work.func;
 238	rps->work.func = dummy_rps_work;
 239
 240	wakeref = intel_gt_pm_get(gt);
 241	intel_rps_disable(&gt->rps);
 242
 243	intel_gt_check_clock_frequency(gt);
 244
 245	for_each_engine(engine, gt, id) {
 246		struct i915_request *rq;
 247		u32 cycles;
 248		u64 dt;
 249
 250		if (!intel_engine_can_store_dword(engine))
 251			continue;
 252
 253		st_engine_heartbeat_disable(engine);
 254
 255		rq = igt_spinner_create_request(&spin,
 256						engine->kernel_context,
 257						MI_NOOP);
 258		if (IS_ERR(rq)) {
 259			st_engine_heartbeat_enable(engine);
 260			err = PTR_ERR(rq);
 261			break;
 262		}
 263
 264		i915_request_add(rq);
 265
 266		if (!igt_wait_for_spinner(&spin, rq)) {
 267			pr_err("%s: RPS spinner did not start\n",
 268			       engine->name);
 269			igt_spinner_end(&spin);
 270			st_engine_heartbeat_enable(engine);
 271			intel_gt_set_wedged(engine->gt);
 272			err = -EIO;
 273			break;
 274		}
 275
 276		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 277
 278		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 279
 280		/* Set the evaluation interval to infinity! */
 281		intel_uncore_write_fw(gt->uncore,
 282				      GEN6_RP_UP_EI, 0xffffffff);
 283		intel_uncore_write_fw(gt->uncore,
 284				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 285
 286		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 287				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 288
 289		if (wait_for(intel_uncore_read_fw(gt->uncore,
 290						  GEN6_RP_CUR_UP_EI),
 291			     10)) {
 292			/* Just skip the test; assume lack of HW support */
 293			pr_notice("%s: rps evaluation interval not ticking\n",
 294				  engine->name);
 295			err = -ENODEV;
 296		} else {
 297			ktime_t dt_[5];
 298			u32 cycles_[5];
 299			int i;
 300
 301			for (i = 0; i < 5; i++) {
 302				preempt_disable();
 303
 304				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 305				dt_[i] = ktime_get();
 306
 307				udelay(1000);
 308
 309				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 310				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 311
 312				preempt_enable();
 313			}
 314
 315			/* Use the median of both cycle/dt; close enough */
 316			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 317			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 318			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 319			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 320		}
 321
 322		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 323		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 324
 325		igt_spinner_end(&spin);
 326		st_engine_heartbeat_enable(engine);
 327
 328		if (err == 0) {
 329			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 330			u32 expected =
 331				intel_gt_ns_to_pm_interval(gt, dt);
 332
 333			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 334				engine->name, cycles, time, dt, expected,
 335				gt->clock_frequency / 1000);
 336
 337			if (10 * time < 8 * dt ||
 338			    8 * time > 10 * dt) {
 339				pr_err("%s: rps clock time does not match walltime!\n",
 340				       engine->name);
 341				err = -EINVAL;
 342			}
 343
 344			if (10 * expected < 8 * cycles ||
 345			    8 * expected > 10 * cycles) {
 346				pr_err("%s: walltime does not match rps clock ticks!\n",
 347				       engine->name);
 348				err = -EINVAL;
 349			}
 350		}
 351
 352		if (igt_flush_test(gt->i915))
 353			err = -EIO;
 354
 355		break; /* once is enough */
 356	}
 357
 358	intel_rps_enable(&gt->rps);
 359	intel_gt_pm_put(gt, wakeref);
 360
 361	igt_spinner_fini(&spin);
 362
 363	intel_gt_pm_wait_for_idle(gt);
 364	rps->work.func = saved_work;
 365
 366	if (err == -ENODEV) /* skipped, don't report a fail */
 367		err = 0;
 368
 369	return err;
 370}
 371
 372int live_rps_control(void *arg)
 373{
 374	struct intel_gt *gt = arg;
 375	struct intel_rps *rps = &gt->rps;
 376	void (*saved_work)(struct work_struct *wrk);
 377	struct intel_engine_cs *engine;
 378	enum intel_engine_id id;
 379	struct igt_spinner spin;
 380	intel_wakeref_t wakeref;
 381	int err = 0;
 382
 383	/*
 384	 * Check that the actual frequency matches our requested frequency,
 385	 * to verify our control mechanism. We have to be careful that the
 386	 * PCU may throttle the GPU in which case the actual frequency used
 387	 * will be lowered than requested.
 388	 */
 389
 390	if (!intel_rps_is_enabled(rps))
 391		return 0;
 392
 393	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 394		return 0;
 395
 396	if (igt_spinner_init(&spin, gt))
 397		return -ENOMEM;
 398
 399	intel_gt_pm_wait_for_idle(gt);
 400	saved_work = rps->work.func;
 401	rps->work.func = dummy_rps_work;
 402
 403	wakeref = intel_gt_pm_get(gt);
 404	for_each_engine(engine, gt, id) {
 405		struct i915_request *rq;
 406		ktime_t min_dt, max_dt;
 407		int f, limit;
 408		int min, max;
 409
 410		if (!intel_engine_can_store_dword(engine))
 411			continue;
 412
 413		st_engine_heartbeat_disable(engine);
 414
 415		rq = igt_spinner_create_request(&spin,
 416						engine->kernel_context,
 417						MI_NOOP);
 418		if (IS_ERR(rq)) {
 419			err = PTR_ERR(rq);
 420			break;
 421		}
 422
 423		i915_request_add(rq);
 424
 425		if (!igt_wait_for_spinner(&spin, rq)) {
 426			pr_err("%s: RPS spinner did not start\n",
 427			       engine->name);
 428			igt_spinner_end(&spin);
 429			st_engine_heartbeat_enable(engine);
 430			intel_gt_set_wedged(engine->gt);
 431			err = -EIO;
 432			break;
 433		}
 434
 435		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 436			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 437			       engine->name, rps->min_freq, read_cagf(rps));
 438			igt_spinner_end(&spin);
 439			st_engine_heartbeat_enable(engine);
 440			show_pstate_limits(rps);
 441			err = -EINVAL;
 442			break;
 443		}
 444
 445		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 446			if (rps_set_check(rps, f) < f)
 447				break;
 448		}
 449
 450		limit = rps_set_check(rps, f);
 451
 452		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 453			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 454			       engine->name, rps->min_freq, read_cagf(rps));
 455			igt_spinner_end(&spin);
 456			st_engine_heartbeat_enable(engine);
 457			show_pstate_limits(rps);
 458			err = -EINVAL;
 459			break;
 460		}
 461
 462		max_dt = ktime_get();
 463		max = rps_set_check(rps, limit);
 464		max_dt = ktime_sub(ktime_get(), max_dt);
 465
 466		min_dt = ktime_get();
 467		min = rps_set_check(rps, rps->min_freq);
 468		min_dt = ktime_sub(ktime_get(), min_dt);
 469
 470		igt_spinner_end(&spin);
 471		st_engine_heartbeat_enable(engine);
 472
 473		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 474			engine->name,
 475			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 476			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 477			limit, intel_gpu_freq(rps, limit),
 478			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 479
 480		if (limit == rps->min_freq) {
 481			pr_err("%s: GPU throttled to minimum!\n",
 482			       engine->name);
 483			show_pstate_limits(rps);
 484			err = -ENODEV;
 485			break;
 486		}
 487
 488		if (igt_flush_test(gt->i915)) {
 489			err = -EIO;
 490			break;
 491		}
 492	}
 493	intel_gt_pm_put(gt, wakeref);
 494
 495	igt_spinner_fini(&spin);
 496
 497	intel_gt_pm_wait_for_idle(gt);
 498	rps->work.func = saved_work;
 499
 500	return err;
 501}
 502
 503static void show_pcu_config(struct intel_rps *rps)
 504{
 505	struct drm_i915_private *i915 = rps_to_i915(rps);
 506	unsigned int max_gpu_freq, min_gpu_freq;
 507	intel_wakeref_t wakeref;
 508	int gpu_freq;
 509
 510	if (!HAS_LLC(i915))
 511		return;
 512
 513	min_gpu_freq = rps->min_freq;
 514	max_gpu_freq = rps->max_freq;
 515	if (GRAPHICS_VER(i915) >= 9) {
 516		/* Convert GT frequency to 50 HZ units */
 517		min_gpu_freq /= GEN9_FREQ_SCALER;
 518		max_gpu_freq /= GEN9_FREQ_SCALER;
 519	}
 520
 521	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 522
 523	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 524	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 525		int ia_freq = gpu_freq;
 526
 527		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
 528			       &ia_freq, NULL);
 529
 530		pr_info("%5d  %5d  %5d\n",
 531			gpu_freq * 50,
 532			((ia_freq >> 0) & 0xff) * 100,
 533			((ia_freq >> 8) & 0xff) * 100);
 534	}
 535
 536	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 537}
 538
 539static u64 __measure_frequency(u32 *cntr, int duration_ms)
 540{
 541	u64 dc, dt;
 542
 543	dc = READ_ONCE(*cntr);
 544	dt = ktime_get();
 545	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 546	dc = READ_ONCE(*cntr) - dc;
 547	dt = ktime_get() - dt;
 548
 549	return div64_u64(1000 * 1000 * dc, dt);
 550}
 551
 552static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 553{
 554	u64 x[5];
 555	int i;
 556
 557	*freq = rps_set_check(rps, *freq);
 558	for (i = 0; i < 5; i++)
 559		x[i] = __measure_frequency(cntr, 2);
 560	*freq = (*freq + read_cagf(rps)) / 2;
 561
 562	/* A simple triangle filter for better result stability */
 563	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 564	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 565}
 566
 567static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 568				  int duration_ms)
 569{
 570	u64 dc, dt;
 571
 572	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 573	dt = ktime_get();
 574	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 575	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 576	dt = ktime_get() - dt;
 577
 578	return div64_u64(1000 * 1000 * dc, dt);
 579}
 580
 581static u64 measure_cs_frequency_at(struct intel_rps *rps,
 582				   struct intel_engine_cs *engine,
 583				   int *freq)
 584{
 585	u64 x[5];
 586	int i;
 587
 588	*freq = rps_set_check(rps, *freq);
 589	for (i = 0; i < 5; i++)
 590		x[i] = __measure_cs_frequency(engine, 2);
 591	*freq = (*freq + read_cagf(rps)) / 2;
 592
 593	/* A simple triangle filter for better result stability */
 594	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 595	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 596}
 597
 598static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 599{
 600	return f_d * x > f_n * y && f_n * x < f_d * y;
 601}
 602
 603int live_rps_frequency_cs(void *arg)
 604{
 605	void (*saved_work)(struct work_struct *wrk);
 606	struct intel_gt *gt = arg;
 607	struct intel_rps *rps = &gt->rps;
 608	struct intel_engine_cs *engine;
 609	struct pm_qos_request qos;
 610	enum intel_engine_id id;
 611	int err = 0;
 612
 613	/*
 614	 * The premise is that the GPU does change frequency at our behest.
 615	 * Let's check there is a correspondence between the requested
 616	 * frequency, the actual frequency, and the observed clock rate.
 617	 */
 618
 619	if (!intel_rps_is_enabled(rps))
 620		return 0;
 621
 622	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 623		return 0;
 624
 625	if (CPU_LATENCY >= 0)
 626		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 627
 628	intel_gt_pm_wait_for_idle(gt);
 629	saved_work = rps->work.func;
 630	rps->work.func = dummy_rps_work;
 631
 632	for_each_engine(engine, gt, id) {
 633		struct i915_request *rq;
 634		struct i915_vma *vma;
 635		u32 *cancel, *cntr;
 636		struct {
 637			u64 count;
 638			int freq;
 639		} min, max;
 640
 641		st_engine_heartbeat_disable(engine);
 642
 643		vma = create_spin_counter(engine,
 644					  engine->kernel_context->vm, false,
 645					  &cancel, &cntr);
 646		if (IS_ERR(vma)) {
 647			err = PTR_ERR(vma);
 648			st_engine_heartbeat_enable(engine);
 649			break;
 650		}
 651
 652		rq = intel_engine_create_kernel_request(engine);
 653		if (IS_ERR(rq)) {
 654			err = PTR_ERR(rq);
 655			goto err_vma;
 656		}
 657
 658		err = i915_vma_move_to_active(vma, rq, 0);
 659		if (!err)
 660			err = rq->engine->emit_bb_start(rq,
 661							i915_vma_offset(vma),
 662							PAGE_SIZE, 0);
 663		i915_request_add(rq);
 664		if (err)
 665			goto err_vma;
 666
 667		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 668			     10)) {
 669			pr_err("%s: timed loop did not start\n",
 670			       engine->name);
 671			goto err_vma;
 672		}
 673
 674		min.freq = rps->min_freq;
 675		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 676
 677		max.freq = rps->max_freq;
 678		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 679
 680		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 681			engine->name,
 682			min.count, intel_gpu_freq(rps, min.freq),
 683			max.count, intel_gpu_freq(rps, max.freq),
 684			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 685						     max.freq * min.count));
 686
 687		if (!scaled_within(max.freq * min.count,
 688				   min.freq * max.count,
 689				   2, 3)) {
 690			int f;
 691
 692			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 693			       engine->name,
 694			       max.freq * min.count,
 695			       min.freq * max.count);
 696			show_pcu_config(rps);
 697
 698			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 699				int act = f;
 700				u64 count;
 701
 702				count = measure_cs_frequency_at(rps, engine, &act);
 703				if (act < f)
 704					break;
 705
 706				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 707					engine->name,
 708					act, intel_gpu_freq(rps, act), count,
 709					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 710								     act * min.count));
 711
 712				f = act; /* may skip ahead [pcu granularity] */
 713			}
 714
 715			err = -EINTR; /* ignore error, continue on with test */
 716		}
 717
 718err_vma:
 719		*cancel = MI_BATCH_BUFFER_END;
 720		i915_gem_object_flush_map(vma->obj);
 721		i915_gem_object_unpin_map(vma->obj);
 722		i915_vma_unpin(vma);
 723		i915_vma_unlock(vma);
 724		i915_vma_put(vma);
 725
 726		st_engine_heartbeat_enable(engine);
 727		if (igt_flush_test(gt->i915))
 728			err = -EIO;
 729		if (err)
 730			break;
 731	}
 732
 733	intel_gt_pm_wait_for_idle(gt);
 734	rps->work.func = saved_work;
 735
 736	if (CPU_LATENCY >= 0)
 737		cpu_latency_qos_remove_request(&qos);
 738
 739	return err;
 740}
 741
 742int live_rps_frequency_srm(void *arg)
 743{
 744	void (*saved_work)(struct work_struct *wrk);
 745	struct intel_gt *gt = arg;
 746	struct intel_rps *rps = &gt->rps;
 747	struct intel_engine_cs *engine;
 748	struct pm_qos_request qos;
 749	enum intel_engine_id id;
 750	int err = 0;
 751
 752	/*
 753	 * The premise is that the GPU does change frequency at our behest.
 754	 * Let's check there is a correspondence between the requested
 755	 * frequency, the actual frequency, and the observed clock rate.
 756	 */
 757
 758	if (!intel_rps_is_enabled(rps))
 759		return 0;
 760
 761	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 762		return 0;
 763
 764	if (CPU_LATENCY >= 0)
 765		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 766
 767	intel_gt_pm_wait_for_idle(gt);
 768	saved_work = rps->work.func;
 769	rps->work.func = dummy_rps_work;
 770
 771	for_each_engine(engine, gt, id) {
 772		struct i915_request *rq;
 773		struct i915_vma *vma;
 774		u32 *cancel, *cntr;
 775		struct {
 776			u64 count;
 777			int freq;
 778		} min, max;
 779
 780		st_engine_heartbeat_disable(engine);
 781
 782		vma = create_spin_counter(engine,
 783					  engine->kernel_context->vm, true,
 784					  &cancel, &cntr);
 785		if (IS_ERR(vma)) {
 786			err = PTR_ERR(vma);
 787			st_engine_heartbeat_enable(engine);
 788			break;
 789		}
 790
 791		rq = intel_engine_create_kernel_request(engine);
 792		if (IS_ERR(rq)) {
 793			err = PTR_ERR(rq);
 794			goto err_vma;
 795		}
 796
 797		err = i915_vma_move_to_active(vma, rq, 0);
 798		if (!err)
 799			err = rq->engine->emit_bb_start(rq,
 800							i915_vma_offset(vma),
 801							PAGE_SIZE, 0);
 802		i915_request_add(rq);
 803		if (err)
 804			goto err_vma;
 805
 806		if (wait_for(READ_ONCE(*cntr), 10)) {
 807			pr_err("%s: timed loop did not start\n",
 808			       engine->name);
 809			goto err_vma;
 810		}
 811
 812		min.freq = rps->min_freq;
 813		min.count = measure_frequency_at(rps, cntr, &min.freq);
 814
 815		max.freq = rps->max_freq;
 816		max.count = measure_frequency_at(rps, cntr, &max.freq);
 817
 818		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 819			engine->name,
 820			min.count, intel_gpu_freq(rps, min.freq),
 821			max.count, intel_gpu_freq(rps, max.freq),
 822			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 823						     max.freq * min.count));
 824
 825		if (!scaled_within(max.freq * min.count,
 826				   min.freq * max.count,
 827				   1, 2)) {
 828			int f;
 829
 830			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 831			       engine->name,
 832			       max.freq * min.count,
 833			       min.freq * max.count);
 834			show_pcu_config(rps);
 835
 836			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 837				int act = f;
 838				u64 count;
 839
 840				count = measure_frequency_at(rps, cntr, &act);
 841				if (act < f)
 842					break;
 843
 844				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 845					engine->name,
 846					act, intel_gpu_freq(rps, act), count,
 847					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 848								     act * min.count));
 849
 850				f = act; /* may skip ahead [pcu granularity] */
 851			}
 852
 853			err = -EINTR; /* ignore error, continue on with test */
 854		}
 855
 856err_vma:
 857		*cancel = MI_BATCH_BUFFER_END;
 858		i915_gem_object_flush_map(vma->obj);
 859		i915_gem_object_unpin_map(vma->obj);
 860		i915_vma_unpin(vma);
 861		i915_vma_unlock(vma);
 862		i915_vma_put(vma);
 863
 864		st_engine_heartbeat_enable(engine);
 865		if (igt_flush_test(gt->i915))
 866			err = -EIO;
 867		if (err)
 868			break;
 869	}
 870
 871	intel_gt_pm_wait_for_idle(gt);
 872	rps->work.func = saved_work;
 873
 874	if (CPU_LATENCY >= 0)
 875		cpu_latency_qos_remove_request(&qos);
 876
 877	return err;
 878}
 879
 880static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 881{
 882	/* Flush any previous EI */
 883	usleep_range(timeout_us, 2 * timeout_us);
 884
 885	/* Reset the interrupt status */
 886	rps_disable_interrupts(rps);
 887	GEM_BUG_ON(rps->pm_iir);
 888	rps_enable_interrupts(rps);
 889
 890	/* And then wait for the timeout, for real this time */
 891	usleep_range(2 * timeout_us, 3 * timeout_us);
 892}
 893
 894static int __rps_up_interrupt(struct intel_rps *rps,
 895			      struct intel_engine_cs *engine,
 896			      struct igt_spinner *spin)
 897{
 898	struct intel_uncore *uncore = engine->uncore;
 899	struct i915_request *rq;
 900	u32 timeout;
 901
 902	if (!intel_engine_can_store_dword(engine))
 903		return 0;
 904
 905	rps_set_check(rps, rps->min_freq);
 906
 907	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 908	if (IS_ERR(rq))
 909		return PTR_ERR(rq);
 910
 911	i915_request_get(rq);
 912	i915_request_add(rq);
 913
 914	if (!igt_wait_for_spinner(spin, rq)) {
 915		pr_err("%s: RPS spinner did not start\n",
 916		       engine->name);
 917		i915_request_put(rq);
 918		intel_gt_set_wedged(engine->gt);
 919		return -EIO;
 920	}
 921
 922	if (!intel_rps_is_active(rps)) {
 923		pr_err("%s: RPS not enabled on starting spinner\n",
 924		       engine->name);
 925		igt_spinner_end(spin);
 926		i915_request_put(rq);
 927		return -EINVAL;
 928	}
 929
 930	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 931		pr_err("%s: RPS did not register UP interrupt\n",
 932		       engine->name);
 933		i915_request_put(rq);
 934		return -EINVAL;
 935	}
 936
 937	if (rps->last_freq != rps->min_freq) {
 938		pr_err("%s: RPS did not program min frequency\n",
 939		       engine->name);
 940		i915_request_put(rq);
 941		return -EINVAL;
 942	}
 943
 944	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 945	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 946	timeout = DIV_ROUND_UP(timeout, 1000);
 947
 948	sleep_for_ei(rps, timeout);
 949	GEM_BUG_ON(i915_request_completed(rq));
 950
 951	igt_spinner_end(spin);
 952	i915_request_put(rq);
 953
 954	if (rps->cur_freq != rps->min_freq) {
 955		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 956		       engine->name, intel_rps_read_actual_frequency(rps));
 957		return -EINVAL;
 958	}
 959
 960	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 961		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 962		       engine->name, rps->pm_iir,
 963		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 964		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 965		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 966		return -EINVAL;
 967	}
 968
 969	return 0;
 970}
 971
 972static int __rps_down_interrupt(struct intel_rps *rps,
 973				struct intel_engine_cs *engine)
 974{
 975	struct intel_uncore *uncore = engine->uncore;
 976	u32 timeout;
 977
 978	rps_set_check(rps, rps->max_freq);
 979
 980	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 981		pr_err("%s: RPS did not register DOWN interrupt\n",
 982		       engine->name);
 983		return -EINVAL;
 984	}
 985
 986	if (rps->last_freq != rps->max_freq) {
 987		pr_err("%s: RPS did not program max frequency\n",
 988		       engine->name);
 989		return -EINVAL;
 990	}
 991
 992	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 993	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 994	timeout = DIV_ROUND_UP(timeout, 1000);
 995
 996	sleep_for_ei(rps, timeout);
 997
 998	if (rps->cur_freq != rps->max_freq) {
 999		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
1000		       engine->name,
1001		       intel_rps_read_actual_frequency(rps));
1002		return -EINVAL;
1003	}
1004
1005	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1006		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1007		       engine->name, rps->pm_iir,
1008		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1009		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1010		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1011		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1012		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1013		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1014		return -EINVAL;
1015	}
1016
1017	return 0;
1018}
1019
1020int live_rps_interrupt(void *arg)
1021{
1022	struct intel_gt *gt = arg;
1023	struct intel_rps *rps = &gt->rps;
1024	void (*saved_work)(struct work_struct *wrk);
1025	struct intel_engine_cs *engine;
1026	enum intel_engine_id id;
1027	struct igt_spinner spin;
1028	intel_wakeref_t wakeref;
1029	u32 pm_events;
1030	int err = 0;
1031
1032	/*
1033	 * First, let's check whether or not we are receiving interrupts.
1034	 */
1035
1036	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1037		return 0;
1038
1039	pm_events = 0;
1040	with_intel_gt_pm(gt, wakeref)
1041		pm_events = rps->pm_events;
1042	if (!pm_events) {
1043		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1044		return -ENODEV;
1045	}
1046
1047	if (igt_spinner_init(&spin, gt))
1048		return -ENOMEM;
1049
1050	intel_gt_pm_wait_for_idle(gt);
1051	saved_work = rps->work.func;
1052	rps->work.func = dummy_rps_work;
1053
1054	for_each_engine(engine, gt, id) {
1055		/* Keep the engine busy with a spinner; expect an UP! */
1056		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1057			intel_gt_pm_wait_for_idle(engine->gt);
1058			GEM_BUG_ON(intel_rps_is_active(rps));
1059
1060			st_engine_heartbeat_disable(engine);
1061
1062			err = __rps_up_interrupt(rps, engine, &spin);
1063
1064			st_engine_heartbeat_enable(engine);
1065			if (err)
1066				goto out;
1067
1068			intel_gt_pm_wait_for_idle(engine->gt);
1069		}
1070
1071		/* Keep the engine awake but idle and check for DOWN */
1072		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1073			st_engine_heartbeat_disable(engine);
1074			intel_rc6_disable(&gt->rc6);
1075
1076			err = __rps_down_interrupt(rps, engine);
1077
1078			intel_rc6_enable(&gt->rc6);
1079			st_engine_heartbeat_enable(engine);
1080			if (err)
1081				goto out;
1082		}
1083	}
1084
1085out:
1086	if (igt_flush_test(gt->i915))
1087		err = -EIO;
1088
1089	igt_spinner_fini(&spin);
1090
1091	intel_gt_pm_wait_for_idle(gt);
1092	rps->work.func = saved_work;
1093
1094	return err;
1095}
1096
1097static u64 __measure_power(int duration_ms)
1098{
1099	u64 dE, dt;
1100
1101	dE = librapl_energy_uJ();
1102	dt = ktime_get();
1103	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1104	dE = librapl_energy_uJ() - dE;
1105	dt = ktime_get() - dt;
1106
1107	return div64_u64(1000 * 1000 * dE, dt);
1108}
1109
1110static u64 measure_power(struct intel_rps *rps, int *freq)
1111{
1112	u64 x[5];
1113	int i;
1114
1115	for (i = 0; i < 5; i++)
1116		x[i] = __measure_power(5);
1117
1118	*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1119
1120	/* A simple triangle filter for better result stability */
1121	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1122	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1123}
1124
1125static u64 measure_power_at(struct intel_rps *rps, int *freq)
1126{
1127	*freq = rps_set_check(rps, *freq);
1128	return measure_power(rps, freq);
1129}
1130
1131int live_rps_power(void *arg)
1132{
1133	struct intel_gt *gt = arg;
1134	struct intel_rps *rps = &gt->rps;
1135	void (*saved_work)(struct work_struct *wrk);
1136	struct intel_engine_cs *engine;
1137	enum intel_engine_id id;
1138	struct igt_spinner spin;
1139	int err = 0;
1140
1141	/*
1142	 * Our fundamental assumption is that running at lower frequency
1143	 * actually saves power. Let's see if our RAPL measurement support
1144	 * that theory.
1145	 */
1146
1147	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1148		return 0;
1149
1150	if (!librapl_supported(gt->i915))
1151		return 0;
1152
1153	if (igt_spinner_init(&spin, gt))
1154		return -ENOMEM;
1155
1156	intel_gt_pm_wait_for_idle(gt);
1157	saved_work = rps->work.func;
1158	rps->work.func = dummy_rps_work;
1159
1160	for_each_engine(engine, gt, id) {
1161		struct i915_request *rq;
1162		struct {
1163			u64 power;
1164			int freq;
1165		} min, max;
1166
1167		if (!intel_engine_can_store_dword(engine))
1168			continue;
1169
1170		st_engine_heartbeat_disable(engine);
1171
1172		rq = igt_spinner_create_request(&spin,
1173						engine->kernel_context,
1174						MI_NOOP);
1175		if (IS_ERR(rq)) {
1176			st_engine_heartbeat_enable(engine);
1177			err = PTR_ERR(rq);
1178			break;
1179		}
1180
1181		i915_request_add(rq);
1182
1183		if (!igt_wait_for_spinner(&spin, rq)) {
1184			pr_err("%s: RPS spinner did not start\n",
1185			       engine->name);
1186			igt_spinner_end(&spin);
1187			st_engine_heartbeat_enable(engine);
1188			intel_gt_set_wedged(engine->gt);
1189			err = -EIO;
1190			break;
1191		}
1192
1193		max.freq = rps->max_freq;
1194		max.power = measure_power_at(rps, &max.freq);
1195
1196		min.freq = rps->min_freq;
1197		min.power = measure_power_at(rps, &min.freq);
1198
1199		igt_spinner_end(&spin);
1200		st_engine_heartbeat_enable(engine);
1201
1202		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1203			engine->name,
1204			min.power, intel_gpu_freq(rps, min.freq),
1205			max.power, intel_gpu_freq(rps, max.freq));
1206
1207		if (10 * min.freq >= 9 * max.freq) {
1208			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1209				  min.freq, intel_gpu_freq(rps, min.freq),
1210				  max.freq, intel_gpu_freq(rps, max.freq));
1211			continue;
1212		}
1213
1214		if (11 * min.power > 10 * max.power) {
1215			pr_err("%s: did not conserve power when setting lower frequency!\n",
1216			       engine->name);
1217			err = -EINVAL;
1218			break;
1219		}
1220
1221		if (igt_flush_test(gt->i915)) {
1222			err = -EIO;
1223			break;
1224		}
1225	}
1226
1227	igt_spinner_fini(&spin);
1228
1229	intel_gt_pm_wait_for_idle(gt);
1230	rps->work.func = saved_work;
1231
1232	return err;
1233}
1234
1235int live_rps_dynamic(void *arg)
1236{
1237	struct intel_gt *gt = arg;
1238	struct intel_rps *rps = &gt->rps;
1239	struct intel_engine_cs *engine;
1240	enum intel_engine_id id;
1241	struct igt_spinner spin;
1242	int err = 0;
1243
1244	/*
1245	 * We've looked at the bascs, and have established that we
1246	 * can change the clock frequency and that the HW will generate
1247	 * interrupts based on load. Now we check how we integrate those
1248	 * moving parts into dynamic reclocking based on load.
1249	 */
1250
1251	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1252		return 0;
1253
1254	if (igt_spinner_init(&spin, gt))
1255		return -ENOMEM;
1256
1257	if (intel_rps_has_interrupts(rps))
1258		pr_info("RPS has interrupt support\n");
1259	if (intel_rps_uses_timer(rps))
1260		pr_info("RPS has timer support\n");
1261
1262	for_each_engine(engine, gt, id) {
1263		struct i915_request *rq;
1264		struct {
1265			ktime_t dt;
1266			u8 freq;
1267		} min, max;
1268
1269		if (!intel_engine_can_store_dword(engine))
1270			continue;
1271
1272		intel_gt_pm_wait_for_idle(gt);
1273		GEM_BUG_ON(intel_rps_is_active(rps));
1274		rps->cur_freq = rps->min_freq;
1275
1276		intel_engine_pm_get(engine);
1277		intel_rc6_disable(&gt->rc6);
1278		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1279
1280		rq = igt_spinner_create_request(&spin,
1281						engine->kernel_context,
1282						MI_NOOP);
1283		if (IS_ERR(rq)) {
1284			err = PTR_ERR(rq);
1285			goto err;
1286		}
1287
1288		i915_request_add(rq);
1289
1290		max.dt = ktime_get();
1291		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1292		max.dt = ktime_sub(ktime_get(), max.dt);
1293
1294		igt_spinner_end(&spin);
1295
1296		min.dt = ktime_get();
1297		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1298		min.dt = ktime_sub(ktime_get(), min.dt);
1299
1300		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1301			engine->name,
1302			max.freq, intel_gpu_freq(rps, max.freq),
1303			ktime_to_ns(max.dt),
1304			min.freq, intel_gpu_freq(rps, min.freq),
1305			ktime_to_ns(min.dt));
1306		if (min.freq >= max.freq) {
1307			pr_err("%s: dynamic reclocking of spinner failed\n!",
1308			       engine->name);
1309			err = -EINVAL;
1310		}
1311
1312err:
1313		intel_rc6_enable(&gt->rc6);
1314		intel_engine_pm_put(engine);
1315
1316		if (igt_flush_test(gt->i915))
1317			err = -EIO;
1318		if (err)
1319			break;
1320	}
1321
1322	igt_spinner_fini(&spin);
1323
1324	return err;
1325}