Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "intel_engine_heartbeat.h"
  10#include "intel_engine_pm.h"
  11#include "intel_gpu_commands.h"
  12#include "intel_gt_clock_utils.h"
  13#include "intel_gt_pm.h"
  14#include "intel_rc6.h"
  15#include "selftest_engine_heartbeat.h"
  16#include "selftest_rps.h"
  17#include "selftests/igt_flush_test.h"
  18#include "selftests/igt_spinner.h"
  19#include "selftests/librapl.h"
  20
  21/* Try to isolate the impact of cstates from determing frequency response */
  22#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  23
  24static void dummy_rps_work(struct work_struct *wrk)
  25{
  26}
  27
  28static int cmp_u64(const void *A, const void *B)
  29{
  30	const u64 *a = A, *b = B;
  31
  32	if (*a < *b)
  33		return -1;
  34	else if (*a > *b)
  35		return 1;
  36	else
  37		return 0;
  38}
  39
  40static int cmp_u32(const void *A, const void *B)
  41{
  42	const u32 *a = A, *b = B;
  43
  44	if (*a < *b)
  45		return -1;
  46	else if (*a > *b)
  47		return 1;
  48	else
  49		return 0;
  50}
  51
  52static struct i915_vma *
  53create_spin_counter(struct intel_engine_cs *engine,
  54		    struct i915_address_space *vm,
  55		    bool srm,
  56		    u32 **cancel,
  57		    u32 **counter)
  58{
  59	enum {
  60		COUNT,
  61		INC,
  62		__NGPR__,
  63	};
  64#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  65	struct drm_i915_gem_object *obj;
  66	struct i915_vma *vma;
  67	unsigned long end;
  68	u32 *base, *cs;
  69	int loop, i;
  70	int err;
  71
  72	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  73	if (IS_ERR(obj))
  74		return ERR_CAST(obj);
  75
  76	end = obj->base.size / sizeof(u32) - 1;
  77
  78	vma = i915_vma_instance(obj, vm, NULL);
  79	if (IS_ERR(vma)) {
  80		err = PTR_ERR(vma);
  81		goto err_put;
  82	}
  83
  84	err = i915_vma_pin(vma, 0, 0, PIN_USER);
  85	if (err)
  86		goto err_unlock;
  87
  88	i915_vma_lock(vma);
  89
  90	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  91	if (IS_ERR(base)) {
  92		err = PTR_ERR(base);
  93		goto err_unpin;
  94	}
  95	cs = base;
  96
  97	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
  98	for (i = 0; i < __NGPR__; i++) {
  99		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
 100		*cs++ = 0;
 101		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 102		*cs++ = 0;
 103	}
 104
 105	*cs++ = MI_LOAD_REGISTER_IMM(1);
 106	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 107	*cs++ = 1;
 108
 109	loop = cs - base;
 110
 111	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 112	for (i = 0; i < 1024; i++) {
 113		*cs++ = MI_MATH(4);
 114		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 115		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 116		*cs++ = MI_MATH_ADD;
 117		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 118
 119		if (srm) {
 120			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
 121			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 122			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
 123			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
 124		}
 125	}
 126
 127	*cs++ = MI_BATCH_BUFFER_START_GEN8;
 128	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 129	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
 130	GEM_BUG_ON(cs - base > end);
 131
 132	i915_gem_object_flush_map(obj);
 133
 134	*cancel = base + loop;
 135	*counter = srm ? memset32(base + end, 0, 1) : NULL;
 136	return vma;
 137
 138err_unpin:
 139	i915_vma_unpin(vma);
 140err_unlock:
 141	i915_vma_unlock(vma);
 142err_put:
 143	i915_gem_object_put(obj);
 144	return ERR_PTR(err);
 145}
 146
 147static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 148{
 149	u8 history[64], i;
 150	unsigned long end;
 151	int sleep;
 152
 153	i = 0;
 154	memset(history, freq, sizeof(history));
 155	sleep = 20;
 156
 157	/* The PCU does not change instantly, but drifts towards the goal? */
 158	end = jiffies + msecs_to_jiffies(timeout_ms);
 159	do {
 160		u8 act;
 161
 162		act = read_cagf(rps);
 163		if (time_after(jiffies, end))
 164			return act;
 165
 166		/* Target acquired */
 167		if (act == freq)
 168			return act;
 169
 170		/* Any change within the last N samples? */
 171		if (!memchr_inv(history, act, sizeof(history)))
 172			return act;
 173
 174		history[i] = act;
 175		i = (i + 1) % ARRAY_SIZE(history);
 176
 177		usleep_range(sleep, 2 * sleep);
 178		sleep *= 2;
 179		if (sleep > timeout_ms * 20)
 180			sleep = timeout_ms * 20;
 181	} while (1);
 182}
 183
 184static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 185{
 186	mutex_lock(&rps->lock);
 187	GEM_BUG_ON(!intel_rps_is_active(rps));
 188	if (wait_for(!intel_rps_set(rps, freq), 50)) {
 189		mutex_unlock(&rps->lock);
 190		return 0;
 191	}
 192	GEM_BUG_ON(rps->last_freq != freq);
 193	mutex_unlock(&rps->lock);
 194
 195	return wait_for_freq(rps, freq, 50);
 196}
 197
 198static void show_pstate_limits(struct intel_rps *rps)
 199{
 200	struct drm_i915_private *i915 = rps_to_i915(rps);
 201
 202	if (IS_BROXTON(i915)) {
 203		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 204			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 205			intel_uncore_read(rps_to_uncore(rps),
 206					  BXT_RP_STATE_CAP));
 207	} else if (GRAPHICS_VER(i915) == 9) {
 208		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 209			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 210			intel_uncore_read(rps_to_uncore(rps),
 211					  GEN9_RP_STATE_LIMITS));
 212	}
 213}
 214
 215int live_rps_clock_interval(void *arg)
 216{
 217	struct intel_gt *gt = arg;
 218	struct intel_rps *rps = &gt->rps;
 219	void (*saved_work)(struct work_struct *wrk);
 220	struct intel_engine_cs *engine;
 221	enum intel_engine_id id;
 222	struct igt_spinner spin;
 223	int err = 0;
 224
 225	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
 226		return 0;
 227
 228	if (igt_spinner_init(&spin, gt))
 229		return -ENOMEM;
 230
 231	intel_gt_pm_wait_for_idle(gt);
 232	saved_work = rps->work.func;
 233	rps->work.func = dummy_rps_work;
 234
 235	intel_gt_pm_get(gt);
 236	intel_rps_disable(&gt->rps);
 237
 238	intel_gt_check_clock_frequency(gt);
 239
 240	for_each_engine(engine, gt, id) {
 241		struct i915_request *rq;
 242		u32 cycles;
 243		u64 dt;
 244
 245		if (!intel_engine_can_store_dword(engine))
 246			continue;
 247
 248		st_engine_heartbeat_disable(engine);
 249
 250		rq = igt_spinner_create_request(&spin,
 251						engine->kernel_context,
 252						MI_NOOP);
 253		if (IS_ERR(rq)) {
 254			st_engine_heartbeat_enable(engine);
 255			err = PTR_ERR(rq);
 256			break;
 257		}
 258
 259		i915_request_add(rq);
 260
 261		if (!igt_wait_for_spinner(&spin, rq)) {
 262			pr_err("%s: RPS spinner did not start\n",
 263			       engine->name);
 264			igt_spinner_end(&spin);
 265			st_engine_heartbeat_enable(engine);
 266			intel_gt_set_wedged(engine->gt);
 267			err = -EIO;
 268			break;
 269		}
 270
 271		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 272
 273		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 274
 275		/* Set the evaluation interval to infinity! */
 276		intel_uncore_write_fw(gt->uncore,
 277				      GEN6_RP_UP_EI, 0xffffffff);
 278		intel_uncore_write_fw(gt->uncore,
 279				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 280
 281		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 282				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 283
 284		if (wait_for(intel_uncore_read_fw(gt->uncore,
 285						  GEN6_RP_CUR_UP_EI),
 286			     10)) {
 287			/* Just skip the test; assume lack of HW support */
 288			pr_notice("%s: rps evaluation interval not ticking\n",
 289				  engine->name);
 290			err = -ENODEV;
 291		} else {
 292			ktime_t dt_[5];
 293			u32 cycles_[5];
 294			int i;
 295
 296			for (i = 0; i < 5; i++) {
 297				preempt_disable();
 298
 299				dt_[i] = ktime_get();
 300				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 301
 302				udelay(1000);
 303
 304				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 305				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 306
 307				preempt_enable();
 308			}
 309
 310			/* Use the median of both cycle/dt; close enough */
 311			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 312			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 313			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 314			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 315		}
 316
 317		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 318		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 319
 320		igt_spinner_end(&spin);
 321		st_engine_heartbeat_enable(engine);
 322
 323		if (err == 0) {
 324			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 325			u32 expected =
 326				intel_gt_ns_to_pm_interval(gt, dt);
 327
 328			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 329				engine->name, cycles, time, dt, expected,
 330				gt->clock_frequency / 1000);
 331
 332			if (10 * time < 8 * dt ||
 333			    8 * time > 10 * dt) {
 334				pr_err("%s: rps clock time does not match walltime!\n",
 335				       engine->name);
 336				err = -EINVAL;
 337			}
 338
 339			if (10 * expected < 8 * cycles ||
 340			    8 * expected > 10 * cycles) {
 341				pr_err("%s: walltime does not match rps clock ticks!\n",
 342				       engine->name);
 343				err = -EINVAL;
 344			}
 345		}
 346
 347		if (igt_flush_test(gt->i915))
 348			err = -EIO;
 349
 350		break; /* once is enough */
 351	}
 352
 353	intel_rps_enable(&gt->rps);
 354	intel_gt_pm_put(gt);
 355
 356	igt_spinner_fini(&spin);
 357
 358	intel_gt_pm_wait_for_idle(gt);
 359	rps->work.func = saved_work;
 360
 361	if (err == -ENODEV) /* skipped, don't report a fail */
 362		err = 0;
 363
 364	return err;
 365}
 366
 367int live_rps_control(void *arg)
 368{
 369	struct intel_gt *gt = arg;
 370	struct intel_rps *rps = &gt->rps;
 371	void (*saved_work)(struct work_struct *wrk);
 372	struct intel_engine_cs *engine;
 373	enum intel_engine_id id;
 374	struct igt_spinner spin;
 375	int err = 0;
 376
 377	/*
 378	 * Check that the actual frequency matches our requested frequency,
 379	 * to verify our control mechanism. We have to be careful that the
 380	 * PCU may throttle the GPU in which case the actual frequency used
 381	 * will be lowered than requested.
 382	 */
 383
 384	if (!intel_rps_is_enabled(rps))
 385		return 0;
 386
 387	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 388		return 0;
 389
 390	if (igt_spinner_init(&spin, gt))
 391		return -ENOMEM;
 392
 393	intel_gt_pm_wait_for_idle(gt);
 394	saved_work = rps->work.func;
 395	rps->work.func = dummy_rps_work;
 396
 397	intel_gt_pm_get(gt);
 398	for_each_engine(engine, gt, id) {
 399		struct i915_request *rq;
 400		ktime_t min_dt, max_dt;
 401		int f, limit;
 402		int min, max;
 403
 404		if (!intel_engine_can_store_dword(engine))
 405			continue;
 406
 407		st_engine_heartbeat_disable(engine);
 408
 409		rq = igt_spinner_create_request(&spin,
 410						engine->kernel_context,
 411						MI_NOOP);
 412		if (IS_ERR(rq)) {
 413			err = PTR_ERR(rq);
 414			break;
 415		}
 416
 417		i915_request_add(rq);
 418
 419		if (!igt_wait_for_spinner(&spin, rq)) {
 420			pr_err("%s: RPS spinner did not start\n",
 421			       engine->name);
 422			igt_spinner_end(&spin);
 423			st_engine_heartbeat_enable(engine);
 424			intel_gt_set_wedged(engine->gt);
 425			err = -EIO;
 426			break;
 427		}
 428
 429		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 430			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 431			       engine->name, rps->min_freq, read_cagf(rps));
 432			igt_spinner_end(&spin);
 433			st_engine_heartbeat_enable(engine);
 434			show_pstate_limits(rps);
 435			err = -EINVAL;
 436			break;
 437		}
 438
 439		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 440			if (rps_set_check(rps, f) < f)
 441				break;
 442		}
 443
 444		limit = rps_set_check(rps, f);
 445
 446		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 447			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 448			       engine->name, rps->min_freq, read_cagf(rps));
 449			igt_spinner_end(&spin);
 450			st_engine_heartbeat_enable(engine);
 451			show_pstate_limits(rps);
 452			err = -EINVAL;
 453			break;
 454		}
 455
 456		max_dt = ktime_get();
 457		max = rps_set_check(rps, limit);
 458		max_dt = ktime_sub(ktime_get(), max_dt);
 459
 460		min_dt = ktime_get();
 461		min = rps_set_check(rps, rps->min_freq);
 462		min_dt = ktime_sub(ktime_get(), min_dt);
 463
 464		igt_spinner_end(&spin);
 465		st_engine_heartbeat_enable(engine);
 466
 467		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 468			engine->name,
 469			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 470			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 471			limit, intel_gpu_freq(rps, limit),
 472			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 473
 474		if (limit == rps->min_freq) {
 475			pr_err("%s: GPU throttled to minimum!\n",
 476			       engine->name);
 477			show_pstate_limits(rps);
 478			err = -ENODEV;
 479			break;
 480		}
 481
 482		if (igt_flush_test(gt->i915)) {
 483			err = -EIO;
 484			break;
 485		}
 486	}
 487	intel_gt_pm_put(gt);
 488
 489	igt_spinner_fini(&spin);
 490
 491	intel_gt_pm_wait_for_idle(gt);
 492	rps->work.func = saved_work;
 493
 494	return err;
 495}
 496
 497static void show_pcu_config(struct intel_rps *rps)
 498{
 499	struct drm_i915_private *i915 = rps_to_i915(rps);
 500	unsigned int max_gpu_freq, min_gpu_freq;
 501	intel_wakeref_t wakeref;
 502	int gpu_freq;
 503
 504	if (!HAS_LLC(i915))
 505		return;
 506
 507	min_gpu_freq = rps->min_freq;
 508	max_gpu_freq = rps->max_freq;
 509	if (GRAPHICS_VER(i915) >= 9) {
 510		/* Convert GT frequency to 50 HZ units */
 511		min_gpu_freq /= GEN9_FREQ_SCALER;
 512		max_gpu_freq /= GEN9_FREQ_SCALER;
 513	}
 514
 515	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 516
 517	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 518	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 519		int ia_freq = gpu_freq;
 520
 521		sandybridge_pcode_read(i915,
 522				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
 523				       &ia_freq, NULL);
 524
 525		pr_info("%5d  %5d  %5d\n",
 526			gpu_freq * 50,
 527			((ia_freq >> 0) & 0xff) * 100,
 528			((ia_freq >> 8) & 0xff) * 100);
 529	}
 530
 531	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 532}
 533
 534static u64 __measure_frequency(u32 *cntr, int duration_ms)
 535{
 536	u64 dc, dt;
 537
 538	dt = ktime_get();
 539	dc = READ_ONCE(*cntr);
 540	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 541	dc = READ_ONCE(*cntr) - dc;
 542	dt = ktime_get() - dt;
 543
 544	return div64_u64(1000 * 1000 * dc, dt);
 545}
 546
 547static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 548{
 549	u64 x[5];
 550	int i;
 551
 552	*freq = rps_set_check(rps, *freq);
 553	for (i = 0; i < 5; i++)
 554		x[i] = __measure_frequency(cntr, 2);
 555	*freq = (*freq + read_cagf(rps)) / 2;
 556
 557	/* A simple triangle filter for better result stability */
 558	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 559	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 560}
 561
 562static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 563				  int duration_ms)
 564{
 565	u64 dc, dt;
 566
 567	dt = ktime_get();
 568	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 569	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 570	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 571	dt = ktime_get() - dt;
 572
 573	return div64_u64(1000 * 1000 * dc, dt);
 574}
 575
 576static u64 measure_cs_frequency_at(struct intel_rps *rps,
 577				   struct intel_engine_cs *engine,
 578				   int *freq)
 579{
 580	u64 x[5];
 581	int i;
 582
 583	*freq = rps_set_check(rps, *freq);
 584	for (i = 0; i < 5; i++)
 585		x[i] = __measure_cs_frequency(engine, 2);
 586	*freq = (*freq + read_cagf(rps)) / 2;
 587
 588	/* A simple triangle filter for better result stability */
 589	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 590	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 591}
 592
 593static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 594{
 595	return f_d * x > f_n * y && f_n * x < f_d * y;
 596}
 597
 598int live_rps_frequency_cs(void *arg)
 599{
 600	void (*saved_work)(struct work_struct *wrk);
 601	struct intel_gt *gt = arg;
 602	struct intel_rps *rps = &gt->rps;
 603	struct intel_engine_cs *engine;
 604	struct pm_qos_request qos;
 605	enum intel_engine_id id;
 606	int err = 0;
 607
 608	/*
 609	 * The premise is that the GPU does change frequency at our behest.
 610	 * Let's check there is a correspondence between the requested
 611	 * frequency, the actual frequency, and the observed clock rate.
 612	 */
 613
 614	if (!intel_rps_is_enabled(rps))
 615		return 0;
 616
 617	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 618		return 0;
 619
 620	if (CPU_LATENCY >= 0)
 621		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 622
 623	intel_gt_pm_wait_for_idle(gt);
 624	saved_work = rps->work.func;
 625	rps->work.func = dummy_rps_work;
 626
 627	for_each_engine(engine, gt, id) {
 628		struct i915_request *rq;
 629		struct i915_vma *vma;
 630		u32 *cancel, *cntr;
 631		struct {
 632			u64 count;
 633			int freq;
 634		} min, max;
 635
 636		st_engine_heartbeat_disable(engine);
 637
 638		vma = create_spin_counter(engine,
 639					  engine->kernel_context->vm, false,
 640					  &cancel, &cntr);
 641		if (IS_ERR(vma)) {
 642			err = PTR_ERR(vma);
 643			st_engine_heartbeat_enable(engine);
 644			break;
 645		}
 646
 647		rq = intel_engine_create_kernel_request(engine);
 648		if (IS_ERR(rq)) {
 649			err = PTR_ERR(rq);
 650			goto err_vma;
 651		}
 652
 653		err = i915_request_await_object(rq, vma->obj, false);
 654		if (!err)
 655			err = i915_vma_move_to_active(vma, rq, 0);
 656		if (!err)
 657			err = rq->engine->emit_bb_start(rq,
 658							vma->node.start,
 659							PAGE_SIZE, 0);
 660		i915_request_add(rq);
 661		if (err)
 662			goto err_vma;
 663
 664		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 665			     10)) {
 666			pr_err("%s: timed loop did not start\n",
 667			       engine->name);
 668			goto err_vma;
 669		}
 670
 671		min.freq = rps->min_freq;
 672		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 673
 674		max.freq = rps->max_freq;
 675		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 676
 677		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 678			engine->name,
 679			min.count, intel_gpu_freq(rps, min.freq),
 680			max.count, intel_gpu_freq(rps, max.freq),
 681			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 682						     max.freq * min.count));
 683
 684		if (!scaled_within(max.freq * min.count,
 685				   min.freq * max.count,
 686				   2, 3)) {
 687			int f;
 688
 689			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 690			       engine->name,
 691			       max.freq * min.count,
 692			       min.freq * max.count);
 693			show_pcu_config(rps);
 694
 695			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 696				int act = f;
 697				u64 count;
 698
 699				count = measure_cs_frequency_at(rps, engine, &act);
 700				if (act < f)
 701					break;
 702
 703				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 704					engine->name,
 705					act, intel_gpu_freq(rps, act), count,
 706					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 707								     act * min.count));
 708
 709				f = act; /* may skip ahead [pcu granularity] */
 710			}
 711
 712			err = -EINTR; /* ignore error, continue on with test */
 713		}
 714
 715err_vma:
 716		*cancel = MI_BATCH_BUFFER_END;
 717		i915_gem_object_flush_map(vma->obj);
 718		i915_gem_object_unpin_map(vma->obj);
 719		i915_vma_unpin(vma);
 720		i915_vma_unlock(vma);
 721		i915_vma_put(vma);
 722
 723		st_engine_heartbeat_enable(engine);
 724		if (igt_flush_test(gt->i915))
 725			err = -EIO;
 726		if (err)
 727			break;
 728	}
 729
 730	intel_gt_pm_wait_for_idle(gt);
 731	rps->work.func = saved_work;
 732
 733	if (CPU_LATENCY >= 0)
 734		cpu_latency_qos_remove_request(&qos);
 735
 736	return err;
 737}
 738
 739int live_rps_frequency_srm(void *arg)
 740{
 741	void (*saved_work)(struct work_struct *wrk);
 742	struct intel_gt *gt = arg;
 743	struct intel_rps *rps = &gt->rps;
 744	struct intel_engine_cs *engine;
 745	struct pm_qos_request qos;
 746	enum intel_engine_id id;
 747	int err = 0;
 748
 749	/*
 750	 * The premise is that the GPU does change frequency at our behest.
 751	 * Let's check there is a correspondence between the requested
 752	 * frequency, the actual frequency, and the observed clock rate.
 753	 */
 754
 755	if (!intel_rps_is_enabled(rps))
 756		return 0;
 757
 758	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 759		return 0;
 760
 761	if (CPU_LATENCY >= 0)
 762		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 763
 764	intel_gt_pm_wait_for_idle(gt);
 765	saved_work = rps->work.func;
 766	rps->work.func = dummy_rps_work;
 767
 768	for_each_engine(engine, gt, id) {
 769		struct i915_request *rq;
 770		struct i915_vma *vma;
 771		u32 *cancel, *cntr;
 772		struct {
 773			u64 count;
 774			int freq;
 775		} min, max;
 776
 777		st_engine_heartbeat_disable(engine);
 778
 779		vma = create_spin_counter(engine,
 780					  engine->kernel_context->vm, true,
 781					  &cancel, &cntr);
 782		if (IS_ERR(vma)) {
 783			err = PTR_ERR(vma);
 784			st_engine_heartbeat_enable(engine);
 785			break;
 786		}
 787
 788		rq = intel_engine_create_kernel_request(engine);
 789		if (IS_ERR(rq)) {
 790			err = PTR_ERR(rq);
 791			goto err_vma;
 792		}
 793
 794		err = i915_request_await_object(rq, vma->obj, false);
 795		if (!err)
 796			err = i915_vma_move_to_active(vma, rq, 0);
 797		if (!err)
 798			err = rq->engine->emit_bb_start(rq,
 799							vma->node.start,
 800							PAGE_SIZE, 0);
 801		i915_request_add(rq);
 802		if (err)
 803			goto err_vma;
 804
 805		if (wait_for(READ_ONCE(*cntr), 10)) {
 806			pr_err("%s: timed loop did not start\n",
 807			       engine->name);
 808			goto err_vma;
 809		}
 810
 811		min.freq = rps->min_freq;
 812		min.count = measure_frequency_at(rps, cntr, &min.freq);
 813
 814		max.freq = rps->max_freq;
 815		max.count = measure_frequency_at(rps, cntr, &max.freq);
 816
 817		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 818			engine->name,
 819			min.count, intel_gpu_freq(rps, min.freq),
 820			max.count, intel_gpu_freq(rps, max.freq),
 821			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 822						     max.freq * min.count));
 823
 824		if (!scaled_within(max.freq * min.count,
 825				   min.freq * max.count,
 826				   1, 2)) {
 827			int f;
 828
 829			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 830			       engine->name,
 831			       max.freq * min.count,
 832			       min.freq * max.count);
 833			show_pcu_config(rps);
 834
 835			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 836				int act = f;
 837				u64 count;
 838
 839				count = measure_frequency_at(rps, cntr, &act);
 840				if (act < f)
 841					break;
 842
 843				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 844					engine->name,
 845					act, intel_gpu_freq(rps, act), count,
 846					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 847								     act * min.count));
 848
 849				f = act; /* may skip ahead [pcu granularity] */
 850			}
 851
 852			err = -EINTR; /* ignore error, continue on with test */
 853		}
 854
 855err_vma:
 856		*cancel = MI_BATCH_BUFFER_END;
 857		i915_gem_object_flush_map(vma->obj);
 858		i915_gem_object_unpin_map(vma->obj);
 859		i915_vma_unpin(vma);
 860		i915_vma_unlock(vma);
 861		i915_vma_put(vma);
 862
 863		st_engine_heartbeat_enable(engine);
 864		if (igt_flush_test(gt->i915))
 865			err = -EIO;
 866		if (err)
 867			break;
 868	}
 869
 870	intel_gt_pm_wait_for_idle(gt);
 871	rps->work.func = saved_work;
 872
 873	if (CPU_LATENCY >= 0)
 874		cpu_latency_qos_remove_request(&qos);
 875
 876	return err;
 877}
 878
 879static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 880{
 881	/* Flush any previous EI */
 882	usleep_range(timeout_us, 2 * timeout_us);
 883
 884	/* Reset the interrupt status */
 885	rps_disable_interrupts(rps);
 886	GEM_BUG_ON(rps->pm_iir);
 887	rps_enable_interrupts(rps);
 888
 889	/* And then wait for the timeout, for real this time */
 890	usleep_range(2 * timeout_us, 3 * timeout_us);
 891}
 892
 893static int __rps_up_interrupt(struct intel_rps *rps,
 894			      struct intel_engine_cs *engine,
 895			      struct igt_spinner *spin)
 896{
 897	struct intel_uncore *uncore = engine->uncore;
 898	struct i915_request *rq;
 899	u32 timeout;
 900
 901	if (!intel_engine_can_store_dword(engine))
 902		return 0;
 903
 904	rps_set_check(rps, rps->min_freq);
 905
 906	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 907	if (IS_ERR(rq))
 908		return PTR_ERR(rq);
 909
 910	i915_request_get(rq);
 911	i915_request_add(rq);
 912
 913	if (!igt_wait_for_spinner(spin, rq)) {
 914		pr_err("%s: RPS spinner did not start\n",
 915		       engine->name);
 916		i915_request_put(rq);
 917		intel_gt_set_wedged(engine->gt);
 918		return -EIO;
 919	}
 920
 921	if (!intel_rps_is_active(rps)) {
 922		pr_err("%s: RPS not enabled on starting spinner\n",
 923		       engine->name);
 924		igt_spinner_end(spin);
 925		i915_request_put(rq);
 926		return -EINVAL;
 927	}
 928
 929	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 930		pr_err("%s: RPS did not register UP interrupt\n",
 931		       engine->name);
 932		i915_request_put(rq);
 933		return -EINVAL;
 934	}
 935
 936	if (rps->last_freq != rps->min_freq) {
 937		pr_err("%s: RPS did not program min frequency\n",
 938		       engine->name);
 939		i915_request_put(rq);
 940		return -EINVAL;
 941	}
 942
 943	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 944	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 945	timeout = DIV_ROUND_UP(timeout, 1000);
 946
 947	sleep_for_ei(rps, timeout);
 948	GEM_BUG_ON(i915_request_completed(rq));
 949
 950	igt_spinner_end(spin);
 951	i915_request_put(rq);
 952
 953	if (rps->cur_freq != rps->min_freq) {
 954		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 955		       engine->name, intel_rps_read_actual_frequency(rps));
 956		return -EINVAL;
 957	}
 958
 959	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 960		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 961		       engine->name, rps->pm_iir,
 962		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 963		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 964		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 965		return -EINVAL;
 966	}
 967
 968	return 0;
 969}
 970
 971static int __rps_down_interrupt(struct intel_rps *rps,
 972				struct intel_engine_cs *engine)
 973{
 974	struct intel_uncore *uncore = engine->uncore;
 975	u32 timeout;
 976
 977	rps_set_check(rps, rps->max_freq);
 978
 979	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 980		pr_err("%s: RPS did not register DOWN interrupt\n",
 981		       engine->name);
 982		return -EINVAL;
 983	}
 984
 985	if (rps->last_freq != rps->max_freq) {
 986		pr_err("%s: RPS did not program max frequency\n",
 987		       engine->name);
 988		return -EINVAL;
 989	}
 990
 991	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 992	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 993	timeout = DIV_ROUND_UP(timeout, 1000);
 994
 995	sleep_for_ei(rps, timeout);
 996
 997	if (rps->cur_freq != rps->max_freq) {
 998		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
 999		       engine->name,
1000		       intel_rps_read_actual_frequency(rps));
1001		return -EINVAL;
1002	}
1003
1004	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1005		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1006		       engine->name, rps->pm_iir,
1007		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1008		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1009		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1010		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1011		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1012		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1013		return -EINVAL;
1014	}
1015
1016	return 0;
1017}
1018
1019int live_rps_interrupt(void *arg)
1020{
1021	struct intel_gt *gt = arg;
1022	struct intel_rps *rps = &gt->rps;
1023	void (*saved_work)(struct work_struct *wrk);
1024	struct intel_engine_cs *engine;
1025	enum intel_engine_id id;
1026	struct igt_spinner spin;
1027	u32 pm_events;
1028	int err = 0;
1029
1030	/*
1031	 * First, let's check whether or not we are receiving interrupts.
1032	 */
1033
1034	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1035		return 0;
1036
1037	intel_gt_pm_get(gt);
1038	pm_events = rps->pm_events;
1039	intel_gt_pm_put(gt);
1040	if (!pm_events) {
1041		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1042		return -ENODEV;
1043	}
1044
1045	if (igt_spinner_init(&spin, gt))
1046		return -ENOMEM;
1047
1048	intel_gt_pm_wait_for_idle(gt);
1049	saved_work = rps->work.func;
1050	rps->work.func = dummy_rps_work;
1051
1052	for_each_engine(engine, gt, id) {
1053		/* Keep the engine busy with a spinner; expect an UP! */
1054		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1055			intel_gt_pm_wait_for_idle(engine->gt);
1056			GEM_BUG_ON(intel_rps_is_active(rps));
1057
1058			st_engine_heartbeat_disable(engine);
1059
1060			err = __rps_up_interrupt(rps, engine, &spin);
1061
1062			st_engine_heartbeat_enable(engine);
1063			if (err)
1064				goto out;
1065
1066			intel_gt_pm_wait_for_idle(engine->gt);
1067		}
1068
1069		/* Keep the engine awake but idle and check for DOWN */
1070		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1071			st_engine_heartbeat_disable(engine);
1072			intel_rc6_disable(&gt->rc6);
1073
1074			err = __rps_down_interrupt(rps, engine);
1075
1076			intel_rc6_enable(&gt->rc6);
1077			st_engine_heartbeat_enable(engine);
1078			if (err)
1079				goto out;
1080		}
1081	}
1082
1083out:
1084	if (igt_flush_test(gt->i915))
1085		err = -EIO;
1086
1087	igt_spinner_fini(&spin);
1088
1089	intel_gt_pm_wait_for_idle(gt);
1090	rps->work.func = saved_work;
1091
1092	return err;
1093}
1094
1095static u64 __measure_power(int duration_ms)
1096{
1097	u64 dE, dt;
1098
1099	dt = ktime_get();
1100	dE = librapl_energy_uJ();
1101	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1102	dE = librapl_energy_uJ() - dE;
1103	dt = ktime_get() - dt;
1104
1105	return div64_u64(1000 * 1000 * dE, dt);
1106}
1107
1108static u64 measure_power_at(struct intel_rps *rps, int *freq)
1109{
1110	u64 x[5];
1111	int i;
1112
1113	*freq = rps_set_check(rps, *freq);
1114	for (i = 0; i < 5; i++)
1115		x[i] = __measure_power(5);
1116	*freq = (*freq + read_cagf(rps)) / 2;
1117
1118	/* A simple triangle filter for better result stability */
1119	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1120	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1121}
1122
1123int live_rps_power(void *arg)
1124{
1125	struct intel_gt *gt = arg;
1126	struct intel_rps *rps = &gt->rps;
1127	void (*saved_work)(struct work_struct *wrk);
1128	struct intel_engine_cs *engine;
1129	enum intel_engine_id id;
1130	struct igt_spinner spin;
1131	int err = 0;
1132
1133	/*
1134	 * Our fundamental assumption is that running at lower frequency
1135	 * actually saves power. Let's see if our RAPL measurement support
1136	 * that theory.
1137	 */
1138
1139	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1140		return 0;
1141
1142	if (!librapl_supported(gt->i915))
1143		return 0;
1144
1145	if (igt_spinner_init(&spin, gt))
1146		return -ENOMEM;
1147
1148	intel_gt_pm_wait_for_idle(gt);
1149	saved_work = rps->work.func;
1150	rps->work.func = dummy_rps_work;
1151
1152	for_each_engine(engine, gt, id) {
1153		struct i915_request *rq;
1154		struct {
1155			u64 power;
1156			int freq;
1157		} min, max;
1158
1159		if (!intel_engine_can_store_dword(engine))
1160			continue;
1161
1162		st_engine_heartbeat_disable(engine);
1163
1164		rq = igt_spinner_create_request(&spin,
1165						engine->kernel_context,
1166						MI_NOOP);
1167		if (IS_ERR(rq)) {
1168			st_engine_heartbeat_enable(engine);
1169			err = PTR_ERR(rq);
1170			break;
1171		}
1172
1173		i915_request_add(rq);
1174
1175		if (!igt_wait_for_spinner(&spin, rq)) {
1176			pr_err("%s: RPS spinner did not start\n",
1177			       engine->name);
1178			igt_spinner_end(&spin);
1179			st_engine_heartbeat_enable(engine);
1180			intel_gt_set_wedged(engine->gt);
1181			err = -EIO;
1182			break;
1183		}
1184
1185		max.freq = rps->max_freq;
1186		max.power = measure_power_at(rps, &max.freq);
1187
1188		min.freq = rps->min_freq;
1189		min.power = measure_power_at(rps, &min.freq);
1190
1191		igt_spinner_end(&spin);
1192		st_engine_heartbeat_enable(engine);
1193
1194		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1195			engine->name,
1196			min.power, intel_gpu_freq(rps, min.freq),
1197			max.power, intel_gpu_freq(rps, max.freq));
1198
1199		if (10 * min.freq >= 9 * max.freq) {
1200			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1201				  min.freq, intel_gpu_freq(rps, min.freq),
1202				  max.freq, intel_gpu_freq(rps, max.freq));
1203			continue;
1204		}
1205
1206		if (11 * min.power > 10 * max.power) {
1207			pr_err("%s: did not conserve power when setting lower frequency!\n",
1208			       engine->name);
1209			err = -EINVAL;
1210			break;
1211		}
1212
1213		if (igt_flush_test(gt->i915)) {
1214			err = -EIO;
1215			break;
1216		}
1217	}
1218
1219	igt_spinner_fini(&spin);
1220
1221	intel_gt_pm_wait_for_idle(gt);
1222	rps->work.func = saved_work;
1223
1224	return err;
1225}
1226
1227int live_rps_dynamic(void *arg)
1228{
1229	struct intel_gt *gt = arg;
1230	struct intel_rps *rps = &gt->rps;
1231	struct intel_engine_cs *engine;
1232	enum intel_engine_id id;
1233	struct igt_spinner spin;
1234	int err = 0;
1235
1236	/*
1237	 * We've looked at the bascs, and have established that we
1238	 * can change the clock frequency and that the HW will generate
1239	 * interrupts based on load. Now we check how we integrate those
1240	 * moving parts into dynamic reclocking based on load.
1241	 */
1242
1243	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1244		return 0;
1245
1246	if (igt_spinner_init(&spin, gt))
1247		return -ENOMEM;
1248
1249	if (intel_rps_has_interrupts(rps))
1250		pr_info("RPS has interrupt support\n");
1251	if (intel_rps_uses_timer(rps))
1252		pr_info("RPS has timer support\n");
1253
1254	for_each_engine(engine, gt, id) {
1255		struct i915_request *rq;
1256		struct {
1257			ktime_t dt;
1258			u8 freq;
1259		} min, max;
1260
1261		if (!intel_engine_can_store_dword(engine))
1262			continue;
1263
1264		intel_gt_pm_wait_for_idle(gt);
1265		GEM_BUG_ON(intel_rps_is_active(rps));
1266		rps->cur_freq = rps->min_freq;
1267
1268		intel_engine_pm_get(engine);
1269		intel_rc6_disable(&gt->rc6);
1270		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1271
1272		rq = igt_spinner_create_request(&spin,
1273						engine->kernel_context,
1274						MI_NOOP);
1275		if (IS_ERR(rq)) {
1276			err = PTR_ERR(rq);
1277			goto err;
1278		}
1279
1280		i915_request_add(rq);
1281
1282		max.dt = ktime_get();
1283		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1284		max.dt = ktime_sub(ktime_get(), max.dt);
1285
1286		igt_spinner_end(&spin);
1287
1288		min.dt = ktime_get();
1289		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1290		min.dt = ktime_sub(ktime_get(), min.dt);
1291
1292		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1293			engine->name,
1294			max.freq, intel_gpu_freq(rps, max.freq),
1295			ktime_to_ns(max.dt),
1296			min.freq, intel_gpu_freq(rps, min.freq),
1297			ktime_to_ns(min.dt));
1298		if (min.freq >= max.freq) {
1299			pr_err("%s: dynamic reclocking of spinner failed\n!",
1300			       engine->name);
1301			err = -EINVAL;
1302		}
1303
1304err:
1305		intel_rc6_enable(&gt->rc6);
1306		intel_engine_pm_put(engine);
1307
1308		if (igt_flush_test(gt->i915))
1309			err = -EIO;
1310		if (err)
1311			break;
1312	}
1313
1314	igt_spinner_fini(&spin);
1315
1316	return err;
1317}