Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2020 Intel Corporation
   4 */
   5
   6#include <linux/pm_qos.h>
   7#include <linux/sort.h>
   8
   9#include "gem/i915_gem_internal.h"
  10
  11#include "intel_engine_heartbeat.h"
  12#include "intel_engine_pm.h"
  13#include "intel_engine_regs.h"
  14#include "intel_gpu_commands.h"
  15#include "intel_gt_clock_utils.h"
  16#include "intel_gt_pm.h"
  17#include "intel_rc6.h"
  18#include "selftest_engine_heartbeat.h"
  19#include "selftest_rps.h"
  20#include "selftests/igt_flush_test.h"
  21#include "selftests/igt_spinner.h"
  22#include "selftests/librapl.h"
  23
  24/* Try to isolate the impact of cstates from determing frequency response */
  25#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
  26
  27static void dummy_rps_work(struct work_struct *wrk)
  28{
  29}
  30
  31static int cmp_u64(const void *A, const void *B)
  32{
  33	const u64 *a = A, *b = B;
  34
  35	if (*a < *b)
  36		return -1;
  37	else if (*a > *b)
  38		return 1;
  39	else
  40		return 0;
  41}
  42
  43static int cmp_u32(const void *A, const void *B)
  44{
  45	const u32 *a = A, *b = B;
  46
  47	if (*a < *b)
  48		return -1;
  49	else if (*a > *b)
  50		return 1;
  51	else
  52		return 0;
  53}
  54
  55static struct i915_vma *
  56create_spin_counter(struct intel_engine_cs *engine,
  57		    struct i915_address_space *vm,
  58		    bool srm,
  59		    u32 **cancel,
  60		    u32 **counter)
  61{
  62	enum {
  63		COUNT,
  64		INC,
  65		__NGPR__,
  66	};
  67#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
  68	struct drm_i915_gem_object *obj;
  69	struct i915_vma *vma;
  70	unsigned long end;
  71	u32 *base, *cs;
  72	int loop, i;
  73	int err;
  74
  75	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
  76	if (IS_ERR(obj))
  77		return ERR_CAST(obj);
  78
  79	end = obj->base.size / sizeof(u32) - 1;
  80
  81	vma = i915_vma_instance(obj, vm, NULL);
  82	if (IS_ERR(vma)) {
  83		err = PTR_ERR(vma);
  84		goto err_put;
  85	}
  86
  87	err = i915_vma_pin(vma, 0, 0, PIN_USER);
  88	if (err)
  89		goto err_unlock;
  90
  91	i915_vma_lock(vma);
  92
  93	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
  94	if (IS_ERR(base)) {
  95		err = PTR_ERR(base);
  96		goto err_unpin;
  97	}
  98	cs = base;
  99
 100	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
 101	for (i = 0; i < __NGPR__; i++) {
 102		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
 103		*cs++ = 0;
 104		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
 105		*cs++ = 0;
 106	}
 107
 108	*cs++ = MI_LOAD_REGISTER_IMM(1);
 109	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
 110	*cs++ = 1;
 111
 112	loop = cs - base;
 113
 114	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
 115	for (i = 0; i < 1024; i++) {
 116		*cs++ = MI_MATH(4);
 117		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
 118		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
 119		*cs++ = MI_MATH_ADD;
 120		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
 121
 122		if (srm) {
 123			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
 124			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
 125			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
 126			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
 127		}
 128	}
 129
 130	*cs++ = MI_BATCH_BUFFER_START_GEN8;
 131	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
 132	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
 133	GEM_BUG_ON(cs - base > end);
 134
 135	i915_gem_object_flush_map(obj);
 136
 137	*cancel = base + loop;
 138	*counter = srm ? memset32(base + end, 0, 1) : NULL;
 139	return vma;
 140
 141err_unpin:
 142	i915_vma_unpin(vma);
 143err_unlock:
 144	i915_vma_unlock(vma);
 145err_put:
 146	i915_gem_object_put(obj);
 147	return ERR_PTR(err);
 148}
 149
 150static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
 151{
 152	u8 history[64], i;
 153	unsigned long end;
 154	int sleep;
 155
 156	i = 0;
 157	memset(history, freq, sizeof(history));
 158	sleep = 20;
 159
 160	/* The PCU does not change instantly, but drifts towards the goal? */
 161	end = jiffies + msecs_to_jiffies(timeout_ms);
 162	do {
 163		u8 act;
 164
 165		act = read_cagf(rps);
 166		if (time_after(jiffies, end))
 167			return act;
 168
 169		/* Target acquired */
 170		if (act == freq)
 171			return act;
 172
 173		/* Any change within the last N samples? */
 174		if (!memchr_inv(history, act, sizeof(history)))
 175			return act;
 176
 177		history[i] = act;
 178		i = (i + 1) % ARRAY_SIZE(history);
 179
 180		usleep_range(sleep, 2 * sleep);
 181		sleep *= 2;
 182		if (sleep > timeout_ms * 20)
 183			sleep = timeout_ms * 20;
 184	} while (1);
 185}
 186
 187static u8 rps_set_check(struct intel_rps *rps, u8 freq)
 188{
 189	mutex_lock(&rps->lock);
 190	GEM_BUG_ON(!intel_rps_is_active(rps));
 191	if (wait_for(!intel_rps_set(rps, freq), 50)) {
 192		mutex_unlock(&rps->lock);
 193		return 0;
 194	}
 195	GEM_BUG_ON(rps->last_freq != freq);
 196	mutex_unlock(&rps->lock);
 197
 198	return wait_for_freq(rps, freq, 50);
 199}
 200
 201static void show_pstate_limits(struct intel_rps *rps)
 202{
 203	struct drm_i915_private *i915 = rps_to_i915(rps);
 204
 205	if (IS_BROXTON(i915)) {
 206		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
 207			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
 208			intel_uncore_read(rps_to_uncore(rps),
 209					  BXT_RP_STATE_CAP));
 210	} else if (GRAPHICS_VER(i915) == 9) {
 211		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
 212			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
 213			intel_uncore_read(rps_to_uncore(rps),
 214					  GEN9_RP_STATE_LIMITS));
 215	}
 216}
 217
 218int live_rps_clock_interval(void *arg)
 219{
 220	struct intel_gt *gt = arg;
 221	struct intel_rps *rps = &gt->rps;
 222	void (*saved_work)(struct work_struct *wrk);
 223	struct intel_engine_cs *engine;
 224	enum intel_engine_id id;
 225	struct igt_spinner spin;
 226	int err = 0;
 227
 228	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
 229		return 0;
 230
 231	if (igt_spinner_init(&spin, gt))
 232		return -ENOMEM;
 233
 234	intel_gt_pm_wait_for_idle(gt);
 235	saved_work = rps->work.func;
 236	rps->work.func = dummy_rps_work;
 237
 238	intel_gt_pm_get(gt);
 239	intel_rps_disable(&gt->rps);
 240
 241	intel_gt_check_clock_frequency(gt);
 242
 243	for_each_engine(engine, gt, id) {
 244		struct i915_request *rq;
 245		u32 cycles;
 246		u64 dt;
 247
 248		if (!intel_engine_can_store_dword(engine))
 249			continue;
 250
 251		st_engine_heartbeat_disable(engine);
 252
 253		rq = igt_spinner_create_request(&spin,
 254						engine->kernel_context,
 255						MI_NOOP);
 256		if (IS_ERR(rq)) {
 257			st_engine_heartbeat_enable(engine);
 258			err = PTR_ERR(rq);
 259			break;
 260		}
 261
 262		i915_request_add(rq);
 263
 264		if (!igt_wait_for_spinner(&spin, rq)) {
 265			pr_err("%s: RPS spinner did not start\n",
 266			       engine->name);
 267			igt_spinner_end(&spin);
 268			st_engine_heartbeat_enable(engine);
 269			intel_gt_set_wedged(engine->gt);
 270			err = -EIO;
 271			break;
 272		}
 273
 274		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 275
 276		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
 277
 278		/* Set the evaluation interval to infinity! */
 279		intel_uncore_write_fw(gt->uncore,
 280				      GEN6_RP_UP_EI, 0xffffffff);
 281		intel_uncore_write_fw(gt->uncore,
 282				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
 283
 284		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
 285				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
 286
 287		if (wait_for(intel_uncore_read_fw(gt->uncore,
 288						  GEN6_RP_CUR_UP_EI),
 289			     10)) {
 290			/* Just skip the test; assume lack of HW support */
 291			pr_notice("%s: rps evaluation interval not ticking\n",
 292				  engine->name);
 293			err = -ENODEV;
 294		} else {
 295			ktime_t dt_[5];
 296			u32 cycles_[5];
 297			int i;
 298
 299			for (i = 0; i < 5; i++) {
 300				preempt_disable();
 301
 302				dt_[i] = ktime_get();
 303				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 304
 305				udelay(1000);
 306
 307				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
 308				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
 309
 310				preempt_enable();
 311			}
 312
 313			/* Use the median of both cycle/dt; close enough */
 314			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
 315			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
 316			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
 317			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
 318		}
 319
 320		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
 321		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
 322
 323		igt_spinner_end(&spin);
 324		st_engine_heartbeat_enable(engine);
 325
 326		if (err == 0) {
 327			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
 328			u32 expected =
 329				intel_gt_ns_to_pm_interval(gt, dt);
 330
 331			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
 332				engine->name, cycles, time, dt, expected,
 333				gt->clock_frequency / 1000);
 334
 335			if (10 * time < 8 * dt ||
 336			    8 * time > 10 * dt) {
 337				pr_err("%s: rps clock time does not match walltime!\n",
 338				       engine->name);
 339				err = -EINVAL;
 340			}
 341
 342			if (10 * expected < 8 * cycles ||
 343			    8 * expected > 10 * cycles) {
 344				pr_err("%s: walltime does not match rps clock ticks!\n",
 345				       engine->name);
 346				err = -EINVAL;
 347			}
 348		}
 349
 350		if (igt_flush_test(gt->i915))
 351			err = -EIO;
 352
 353		break; /* once is enough */
 354	}
 355
 356	intel_rps_enable(&gt->rps);
 357	intel_gt_pm_put(gt);
 358
 359	igt_spinner_fini(&spin);
 360
 361	intel_gt_pm_wait_for_idle(gt);
 362	rps->work.func = saved_work;
 363
 364	if (err == -ENODEV) /* skipped, don't report a fail */
 365		err = 0;
 366
 367	return err;
 368}
 369
 370int live_rps_control(void *arg)
 371{
 372	struct intel_gt *gt = arg;
 373	struct intel_rps *rps = &gt->rps;
 374	void (*saved_work)(struct work_struct *wrk);
 375	struct intel_engine_cs *engine;
 376	enum intel_engine_id id;
 377	struct igt_spinner spin;
 378	int err = 0;
 379
 380	/*
 381	 * Check that the actual frequency matches our requested frequency,
 382	 * to verify our control mechanism. We have to be careful that the
 383	 * PCU may throttle the GPU in which case the actual frequency used
 384	 * will be lowered than requested.
 385	 */
 386
 387	if (!intel_rps_is_enabled(rps))
 388		return 0;
 389
 390	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
 391		return 0;
 392
 393	if (igt_spinner_init(&spin, gt))
 394		return -ENOMEM;
 395
 396	intel_gt_pm_wait_for_idle(gt);
 397	saved_work = rps->work.func;
 398	rps->work.func = dummy_rps_work;
 399
 400	intel_gt_pm_get(gt);
 401	for_each_engine(engine, gt, id) {
 402		struct i915_request *rq;
 403		ktime_t min_dt, max_dt;
 404		int f, limit;
 405		int min, max;
 406
 407		if (!intel_engine_can_store_dword(engine))
 408			continue;
 409
 410		st_engine_heartbeat_disable(engine);
 411
 412		rq = igt_spinner_create_request(&spin,
 413						engine->kernel_context,
 414						MI_NOOP);
 415		if (IS_ERR(rq)) {
 416			err = PTR_ERR(rq);
 417			break;
 418		}
 419
 420		i915_request_add(rq);
 421
 422		if (!igt_wait_for_spinner(&spin, rq)) {
 423			pr_err("%s: RPS spinner did not start\n",
 424			       engine->name);
 425			igt_spinner_end(&spin);
 426			st_engine_heartbeat_enable(engine);
 427			intel_gt_set_wedged(engine->gt);
 428			err = -EIO;
 429			break;
 430		}
 431
 432		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 433			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
 434			       engine->name, rps->min_freq, read_cagf(rps));
 435			igt_spinner_end(&spin);
 436			st_engine_heartbeat_enable(engine);
 437			show_pstate_limits(rps);
 438			err = -EINVAL;
 439			break;
 440		}
 441
 442		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
 443			if (rps_set_check(rps, f) < f)
 444				break;
 445		}
 446
 447		limit = rps_set_check(rps, f);
 448
 449		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
 450			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
 451			       engine->name, rps->min_freq, read_cagf(rps));
 452			igt_spinner_end(&spin);
 453			st_engine_heartbeat_enable(engine);
 454			show_pstate_limits(rps);
 455			err = -EINVAL;
 456			break;
 457		}
 458
 459		max_dt = ktime_get();
 460		max = rps_set_check(rps, limit);
 461		max_dt = ktime_sub(ktime_get(), max_dt);
 462
 463		min_dt = ktime_get();
 464		min = rps_set_check(rps, rps->min_freq);
 465		min_dt = ktime_sub(ktime_get(), min_dt);
 466
 467		igt_spinner_end(&spin);
 468		st_engine_heartbeat_enable(engine);
 469
 470		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
 471			engine->name,
 472			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
 473			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
 474			limit, intel_gpu_freq(rps, limit),
 475			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
 476
 477		if (limit == rps->min_freq) {
 478			pr_err("%s: GPU throttled to minimum!\n",
 479			       engine->name);
 480			show_pstate_limits(rps);
 481			err = -ENODEV;
 482			break;
 483		}
 484
 485		if (igt_flush_test(gt->i915)) {
 486			err = -EIO;
 487			break;
 488		}
 489	}
 490	intel_gt_pm_put(gt);
 491
 492	igt_spinner_fini(&spin);
 493
 494	intel_gt_pm_wait_for_idle(gt);
 495	rps->work.func = saved_work;
 496
 497	return err;
 498}
 499
 500static void show_pcu_config(struct intel_rps *rps)
 501{
 502	struct drm_i915_private *i915 = rps_to_i915(rps);
 503	unsigned int max_gpu_freq, min_gpu_freq;
 504	intel_wakeref_t wakeref;
 505	int gpu_freq;
 506
 507	if (!HAS_LLC(i915))
 508		return;
 509
 510	min_gpu_freq = rps->min_freq;
 511	max_gpu_freq = rps->max_freq;
 512	if (GRAPHICS_VER(i915) >= 9) {
 513		/* Convert GT frequency to 50 HZ units */
 514		min_gpu_freq /= GEN9_FREQ_SCALER;
 515		max_gpu_freq /= GEN9_FREQ_SCALER;
 516	}
 517
 518	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
 519
 520	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
 521	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 522		int ia_freq = gpu_freq;
 523
 524		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
 525			       &ia_freq, NULL);
 526
 527		pr_info("%5d  %5d  %5d\n",
 528			gpu_freq * 50,
 529			((ia_freq >> 0) & 0xff) * 100,
 530			((ia_freq >> 8) & 0xff) * 100);
 531	}
 532
 533	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
 534}
 535
 536static u64 __measure_frequency(u32 *cntr, int duration_ms)
 537{
 538	u64 dc, dt;
 539
 540	dt = ktime_get();
 541	dc = READ_ONCE(*cntr);
 542	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 543	dc = READ_ONCE(*cntr) - dc;
 544	dt = ktime_get() - dt;
 545
 546	return div64_u64(1000 * 1000 * dc, dt);
 547}
 548
 549static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
 550{
 551	u64 x[5];
 552	int i;
 553
 554	*freq = rps_set_check(rps, *freq);
 555	for (i = 0; i < 5; i++)
 556		x[i] = __measure_frequency(cntr, 2);
 557	*freq = (*freq + read_cagf(rps)) / 2;
 558
 559	/* A simple triangle filter for better result stability */
 560	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 561	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 562}
 563
 564static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
 565				  int duration_ms)
 566{
 567	u64 dc, dt;
 568
 569	dt = ktime_get();
 570	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
 571	usleep_range(1000 * duration_ms, 2000 * duration_ms);
 572	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
 573	dt = ktime_get() - dt;
 574
 575	return div64_u64(1000 * 1000 * dc, dt);
 576}
 577
 578static u64 measure_cs_frequency_at(struct intel_rps *rps,
 579				   struct intel_engine_cs *engine,
 580				   int *freq)
 581{
 582	u64 x[5];
 583	int i;
 584
 585	*freq = rps_set_check(rps, *freq);
 586	for (i = 0; i < 5; i++)
 587		x[i] = __measure_cs_frequency(engine, 2);
 588	*freq = (*freq + read_cagf(rps)) / 2;
 589
 590	/* A simple triangle filter for better result stability */
 591	sort(x, 5, sizeof(*x), cmp_u64, NULL);
 592	return div_u64(x[1] + 2 * x[2] + x[3], 4);
 593}
 594
 595static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
 596{
 597	return f_d * x > f_n * y && f_n * x < f_d * y;
 598}
 599
 600int live_rps_frequency_cs(void *arg)
 601{
 602	void (*saved_work)(struct work_struct *wrk);
 603	struct intel_gt *gt = arg;
 604	struct intel_rps *rps = &gt->rps;
 605	struct intel_engine_cs *engine;
 606	struct pm_qos_request qos;
 607	enum intel_engine_id id;
 608	int err = 0;
 609
 610	/*
 611	 * The premise is that the GPU does change frequency at our behest.
 612	 * Let's check there is a correspondence between the requested
 613	 * frequency, the actual frequency, and the observed clock rate.
 614	 */
 615
 616	if (!intel_rps_is_enabled(rps))
 617		return 0;
 618
 619	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 620		return 0;
 621
 622	if (CPU_LATENCY >= 0)
 623		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 624
 625	intel_gt_pm_wait_for_idle(gt);
 626	saved_work = rps->work.func;
 627	rps->work.func = dummy_rps_work;
 628
 629	for_each_engine(engine, gt, id) {
 630		struct i915_request *rq;
 631		struct i915_vma *vma;
 632		u32 *cancel, *cntr;
 633		struct {
 634			u64 count;
 635			int freq;
 636		} min, max;
 637
 638		st_engine_heartbeat_disable(engine);
 639
 640		vma = create_spin_counter(engine,
 641					  engine->kernel_context->vm, false,
 642					  &cancel, &cntr);
 643		if (IS_ERR(vma)) {
 644			err = PTR_ERR(vma);
 645			st_engine_heartbeat_enable(engine);
 646			break;
 647		}
 648
 649		rq = intel_engine_create_kernel_request(engine);
 650		if (IS_ERR(rq)) {
 651			err = PTR_ERR(rq);
 652			goto err_vma;
 653		}
 654
 655		err = i915_vma_move_to_active(vma, rq, 0);
 656		if (!err)
 657			err = rq->engine->emit_bb_start(rq,
 658							vma->node.start,
 659							PAGE_SIZE, 0);
 660		i915_request_add(rq);
 661		if (err)
 662			goto err_vma;
 663
 664		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
 665			     10)) {
 666			pr_err("%s: timed loop did not start\n",
 667			       engine->name);
 668			goto err_vma;
 669		}
 670
 671		min.freq = rps->min_freq;
 672		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
 673
 674		max.freq = rps->max_freq;
 675		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
 676
 677		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 678			engine->name,
 679			min.count, intel_gpu_freq(rps, min.freq),
 680			max.count, intel_gpu_freq(rps, max.freq),
 681			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 682						     max.freq * min.count));
 683
 684		if (!scaled_within(max.freq * min.count,
 685				   min.freq * max.count,
 686				   2, 3)) {
 687			int f;
 688
 689			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 690			       engine->name,
 691			       max.freq * min.count,
 692			       min.freq * max.count);
 693			show_pcu_config(rps);
 694
 695			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 696				int act = f;
 697				u64 count;
 698
 699				count = measure_cs_frequency_at(rps, engine, &act);
 700				if (act < f)
 701					break;
 702
 703				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 704					engine->name,
 705					act, intel_gpu_freq(rps, act), count,
 706					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 707								     act * min.count));
 708
 709				f = act; /* may skip ahead [pcu granularity] */
 710			}
 711
 712			err = -EINTR; /* ignore error, continue on with test */
 713		}
 714
 715err_vma:
 716		*cancel = MI_BATCH_BUFFER_END;
 717		i915_gem_object_flush_map(vma->obj);
 718		i915_gem_object_unpin_map(vma->obj);
 719		i915_vma_unpin(vma);
 720		i915_vma_unlock(vma);
 721		i915_vma_put(vma);
 722
 723		st_engine_heartbeat_enable(engine);
 724		if (igt_flush_test(gt->i915))
 725			err = -EIO;
 726		if (err)
 727			break;
 728	}
 729
 730	intel_gt_pm_wait_for_idle(gt);
 731	rps->work.func = saved_work;
 732
 733	if (CPU_LATENCY >= 0)
 734		cpu_latency_qos_remove_request(&qos);
 735
 736	return err;
 737}
 738
 739int live_rps_frequency_srm(void *arg)
 740{
 741	void (*saved_work)(struct work_struct *wrk);
 742	struct intel_gt *gt = arg;
 743	struct intel_rps *rps = &gt->rps;
 744	struct intel_engine_cs *engine;
 745	struct pm_qos_request qos;
 746	enum intel_engine_id id;
 747	int err = 0;
 748
 749	/*
 750	 * The premise is that the GPU does change frequency at our behest.
 751	 * Let's check there is a correspondence between the requested
 752	 * frequency, the actual frequency, and the observed clock rate.
 753	 */
 754
 755	if (!intel_rps_is_enabled(rps))
 756		return 0;
 757
 758	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
 759		return 0;
 760
 761	if (CPU_LATENCY >= 0)
 762		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
 763
 764	intel_gt_pm_wait_for_idle(gt);
 765	saved_work = rps->work.func;
 766	rps->work.func = dummy_rps_work;
 767
 768	for_each_engine(engine, gt, id) {
 769		struct i915_request *rq;
 770		struct i915_vma *vma;
 771		u32 *cancel, *cntr;
 772		struct {
 773			u64 count;
 774			int freq;
 775		} min, max;
 776
 777		st_engine_heartbeat_disable(engine);
 778
 779		vma = create_spin_counter(engine,
 780					  engine->kernel_context->vm, true,
 781					  &cancel, &cntr);
 782		if (IS_ERR(vma)) {
 783			err = PTR_ERR(vma);
 784			st_engine_heartbeat_enable(engine);
 785			break;
 786		}
 787
 788		rq = intel_engine_create_kernel_request(engine);
 789		if (IS_ERR(rq)) {
 790			err = PTR_ERR(rq);
 791			goto err_vma;
 792		}
 793
 794		err = i915_vma_move_to_active(vma, rq, 0);
 795		if (!err)
 796			err = rq->engine->emit_bb_start(rq,
 797							vma->node.start,
 798							PAGE_SIZE, 0);
 799		i915_request_add(rq);
 800		if (err)
 801			goto err_vma;
 802
 803		if (wait_for(READ_ONCE(*cntr), 10)) {
 804			pr_err("%s: timed loop did not start\n",
 805			       engine->name);
 806			goto err_vma;
 807		}
 808
 809		min.freq = rps->min_freq;
 810		min.count = measure_frequency_at(rps, cntr, &min.freq);
 811
 812		max.freq = rps->max_freq;
 813		max.count = measure_frequency_at(rps, cntr, &max.freq);
 814
 815		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
 816			engine->name,
 817			min.count, intel_gpu_freq(rps, min.freq),
 818			max.count, intel_gpu_freq(rps, max.freq),
 819			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
 820						     max.freq * min.count));
 821
 822		if (!scaled_within(max.freq * min.count,
 823				   min.freq * max.count,
 824				   1, 2)) {
 825			int f;
 826
 827			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
 828			       engine->name,
 829			       max.freq * min.count,
 830			       min.freq * max.count);
 831			show_pcu_config(rps);
 832
 833			for (f = min.freq + 1; f <= rps->max_freq; f++) {
 834				int act = f;
 835				u64 count;
 836
 837				count = measure_frequency_at(rps, cntr, &act);
 838				if (act < f)
 839					break;
 840
 841				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
 842					engine->name,
 843					act, intel_gpu_freq(rps, act), count,
 844					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
 845								     act * min.count));
 846
 847				f = act; /* may skip ahead [pcu granularity] */
 848			}
 849
 850			err = -EINTR; /* ignore error, continue on with test */
 851		}
 852
 853err_vma:
 854		*cancel = MI_BATCH_BUFFER_END;
 855		i915_gem_object_flush_map(vma->obj);
 856		i915_gem_object_unpin_map(vma->obj);
 857		i915_vma_unpin(vma);
 858		i915_vma_unlock(vma);
 859		i915_vma_put(vma);
 860
 861		st_engine_heartbeat_enable(engine);
 862		if (igt_flush_test(gt->i915))
 863			err = -EIO;
 864		if (err)
 865			break;
 866	}
 867
 868	intel_gt_pm_wait_for_idle(gt);
 869	rps->work.func = saved_work;
 870
 871	if (CPU_LATENCY >= 0)
 872		cpu_latency_qos_remove_request(&qos);
 873
 874	return err;
 875}
 876
 877static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
 878{
 879	/* Flush any previous EI */
 880	usleep_range(timeout_us, 2 * timeout_us);
 881
 882	/* Reset the interrupt status */
 883	rps_disable_interrupts(rps);
 884	GEM_BUG_ON(rps->pm_iir);
 885	rps_enable_interrupts(rps);
 886
 887	/* And then wait for the timeout, for real this time */
 888	usleep_range(2 * timeout_us, 3 * timeout_us);
 889}
 890
 891static int __rps_up_interrupt(struct intel_rps *rps,
 892			      struct intel_engine_cs *engine,
 893			      struct igt_spinner *spin)
 894{
 895	struct intel_uncore *uncore = engine->uncore;
 896	struct i915_request *rq;
 897	u32 timeout;
 898
 899	if (!intel_engine_can_store_dword(engine))
 900		return 0;
 901
 902	rps_set_check(rps, rps->min_freq);
 903
 904	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
 905	if (IS_ERR(rq))
 906		return PTR_ERR(rq);
 907
 908	i915_request_get(rq);
 909	i915_request_add(rq);
 910
 911	if (!igt_wait_for_spinner(spin, rq)) {
 912		pr_err("%s: RPS spinner did not start\n",
 913		       engine->name);
 914		i915_request_put(rq);
 915		intel_gt_set_wedged(engine->gt);
 916		return -EIO;
 917	}
 918
 919	if (!intel_rps_is_active(rps)) {
 920		pr_err("%s: RPS not enabled on starting spinner\n",
 921		       engine->name);
 922		igt_spinner_end(spin);
 923		i915_request_put(rq);
 924		return -EINVAL;
 925	}
 926
 927	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
 928		pr_err("%s: RPS did not register UP interrupt\n",
 929		       engine->name);
 930		i915_request_put(rq);
 931		return -EINVAL;
 932	}
 933
 934	if (rps->last_freq != rps->min_freq) {
 935		pr_err("%s: RPS did not program min frequency\n",
 936		       engine->name);
 937		i915_request_put(rq);
 938		return -EINVAL;
 939	}
 940
 941	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
 942	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 943	timeout = DIV_ROUND_UP(timeout, 1000);
 944
 945	sleep_for_ei(rps, timeout);
 946	GEM_BUG_ON(i915_request_completed(rq));
 947
 948	igt_spinner_end(spin);
 949	i915_request_put(rq);
 950
 951	if (rps->cur_freq != rps->min_freq) {
 952		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
 953		       engine->name, intel_rps_read_actual_frequency(rps));
 954		return -EINVAL;
 955	}
 956
 957	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
 958		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
 959		       engine->name, rps->pm_iir,
 960		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
 961		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
 962		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
 963		return -EINVAL;
 964	}
 965
 966	return 0;
 967}
 968
 969static int __rps_down_interrupt(struct intel_rps *rps,
 970				struct intel_engine_cs *engine)
 971{
 972	struct intel_uncore *uncore = engine->uncore;
 973	u32 timeout;
 974
 975	rps_set_check(rps, rps->max_freq);
 976
 977	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
 978		pr_err("%s: RPS did not register DOWN interrupt\n",
 979		       engine->name);
 980		return -EINVAL;
 981	}
 982
 983	if (rps->last_freq != rps->max_freq) {
 984		pr_err("%s: RPS did not program max frequency\n",
 985		       engine->name);
 986		return -EINVAL;
 987	}
 988
 989	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
 990	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
 991	timeout = DIV_ROUND_UP(timeout, 1000);
 992
 993	sleep_for_ei(rps, timeout);
 994
 995	if (rps->cur_freq != rps->max_freq) {
 996		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
 997		       engine->name,
 998		       intel_rps_read_actual_frequency(rps));
 999		return -EINVAL;
1000	}
1001
1002	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1003		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1004		       engine->name, rps->pm_iir,
1005		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1006		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1007		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1008		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1009		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1010		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1011		return -EINVAL;
1012	}
1013
1014	return 0;
1015}
1016
1017int live_rps_interrupt(void *arg)
1018{
1019	struct intel_gt *gt = arg;
1020	struct intel_rps *rps = &gt->rps;
1021	void (*saved_work)(struct work_struct *wrk);
1022	struct intel_engine_cs *engine;
1023	enum intel_engine_id id;
1024	struct igt_spinner spin;
1025	u32 pm_events;
1026	int err = 0;
1027
1028	/*
1029	 * First, let's check whether or not we are receiving interrupts.
1030	 */
1031
1032	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1033		return 0;
1034
1035	intel_gt_pm_get(gt);
1036	pm_events = rps->pm_events;
1037	intel_gt_pm_put(gt);
1038	if (!pm_events) {
1039		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1040		return -ENODEV;
1041	}
1042
1043	if (igt_spinner_init(&spin, gt))
1044		return -ENOMEM;
1045
1046	intel_gt_pm_wait_for_idle(gt);
1047	saved_work = rps->work.func;
1048	rps->work.func = dummy_rps_work;
1049
1050	for_each_engine(engine, gt, id) {
1051		/* Keep the engine busy with a spinner; expect an UP! */
1052		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1053			intel_gt_pm_wait_for_idle(engine->gt);
1054			GEM_BUG_ON(intel_rps_is_active(rps));
1055
1056			st_engine_heartbeat_disable(engine);
1057
1058			err = __rps_up_interrupt(rps, engine, &spin);
1059
1060			st_engine_heartbeat_enable(engine);
1061			if (err)
1062				goto out;
1063
1064			intel_gt_pm_wait_for_idle(engine->gt);
1065		}
1066
1067		/* Keep the engine awake but idle and check for DOWN */
1068		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1069			st_engine_heartbeat_disable(engine);
1070			intel_rc6_disable(&gt->rc6);
1071
1072			err = __rps_down_interrupt(rps, engine);
1073
1074			intel_rc6_enable(&gt->rc6);
1075			st_engine_heartbeat_enable(engine);
1076			if (err)
1077				goto out;
1078		}
1079	}
1080
1081out:
1082	if (igt_flush_test(gt->i915))
1083		err = -EIO;
1084
1085	igt_spinner_fini(&spin);
1086
1087	intel_gt_pm_wait_for_idle(gt);
1088	rps->work.func = saved_work;
1089
1090	return err;
1091}
1092
1093static u64 __measure_power(int duration_ms)
1094{
1095	u64 dE, dt;
1096
1097	dt = ktime_get();
1098	dE = librapl_energy_uJ();
1099	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1100	dE = librapl_energy_uJ() - dE;
1101	dt = ktime_get() - dt;
1102
1103	return div64_u64(1000 * 1000 * dE, dt);
1104}
1105
1106static u64 measure_power(struct intel_rps *rps, int *freq)
1107{
1108	u64 x[5];
1109	int i;
1110
1111	for (i = 0; i < 5; i++)
1112		x[i] = __measure_power(5);
1113
1114	*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1115
1116	/* A simple triangle filter for better result stability */
1117	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1118	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1119}
1120
1121static u64 measure_power_at(struct intel_rps *rps, int *freq)
1122{
1123	*freq = rps_set_check(rps, *freq);
1124	return measure_power(rps, freq);
1125}
1126
1127int live_rps_power(void *arg)
1128{
1129	struct intel_gt *gt = arg;
1130	struct intel_rps *rps = &gt->rps;
1131	void (*saved_work)(struct work_struct *wrk);
1132	struct intel_engine_cs *engine;
1133	enum intel_engine_id id;
1134	struct igt_spinner spin;
1135	int err = 0;
1136
1137	/*
1138	 * Our fundamental assumption is that running at lower frequency
1139	 * actually saves power. Let's see if our RAPL measurement support
1140	 * that theory.
1141	 */
1142
1143	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1144		return 0;
1145
1146	if (!librapl_supported(gt->i915))
1147		return 0;
1148
1149	if (igt_spinner_init(&spin, gt))
1150		return -ENOMEM;
1151
1152	intel_gt_pm_wait_for_idle(gt);
1153	saved_work = rps->work.func;
1154	rps->work.func = dummy_rps_work;
1155
1156	for_each_engine(engine, gt, id) {
1157		struct i915_request *rq;
1158		struct {
1159			u64 power;
1160			int freq;
1161		} min, max;
1162
1163		if (!intel_engine_can_store_dword(engine))
1164			continue;
1165
1166		st_engine_heartbeat_disable(engine);
1167
1168		rq = igt_spinner_create_request(&spin,
1169						engine->kernel_context,
1170						MI_NOOP);
1171		if (IS_ERR(rq)) {
1172			st_engine_heartbeat_enable(engine);
1173			err = PTR_ERR(rq);
1174			break;
1175		}
1176
1177		i915_request_add(rq);
1178
1179		if (!igt_wait_for_spinner(&spin, rq)) {
1180			pr_err("%s: RPS spinner did not start\n",
1181			       engine->name);
1182			igt_spinner_end(&spin);
1183			st_engine_heartbeat_enable(engine);
1184			intel_gt_set_wedged(engine->gt);
1185			err = -EIO;
1186			break;
1187		}
1188
1189		max.freq = rps->max_freq;
1190		max.power = measure_power_at(rps, &max.freq);
1191
1192		min.freq = rps->min_freq;
1193		min.power = measure_power_at(rps, &min.freq);
1194
1195		igt_spinner_end(&spin);
1196		st_engine_heartbeat_enable(engine);
1197
1198		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1199			engine->name,
1200			min.power, intel_gpu_freq(rps, min.freq),
1201			max.power, intel_gpu_freq(rps, max.freq));
1202
1203		if (10 * min.freq >= 9 * max.freq) {
1204			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1205				  min.freq, intel_gpu_freq(rps, min.freq),
1206				  max.freq, intel_gpu_freq(rps, max.freq));
1207			continue;
1208		}
1209
1210		if (11 * min.power > 10 * max.power) {
1211			pr_err("%s: did not conserve power when setting lower frequency!\n",
1212			       engine->name);
1213			err = -EINVAL;
1214			break;
1215		}
1216
1217		if (igt_flush_test(gt->i915)) {
1218			err = -EIO;
1219			break;
1220		}
1221	}
1222
1223	igt_spinner_fini(&spin);
1224
1225	intel_gt_pm_wait_for_idle(gt);
1226	rps->work.func = saved_work;
1227
1228	return err;
1229}
1230
1231int live_rps_dynamic(void *arg)
1232{
1233	struct intel_gt *gt = arg;
1234	struct intel_rps *rps = &gt->rps;
1235	struct intel_engine_cs *engine;
1236	enum intel_engine_id id;
1237	struct igt_spinner spin;
1238	int err = 0;
1239
1240	/*
1241	 * We've looked at the bascs, and have established that we
1242	 * can change the clock frequency and that the HW will generate
1243	 * interrupts based on load. Now we check how we integrate those
1244	 * moving parts into dynamic reclocking based on load.
1245	 */
1246
1247	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1248		return 0;
1249
1250	if (igt_spinner_init(&spin, gt))
1251		return -ENOMEM;
1252
1253	if (intel_rps_has_interrupts(rps))
1254		pr_info("RPS has interrupt support\n");
1255	if (intel_rps_uses_timer(rps))
1256		pr_info("RPS has timer support\n");
1257
1258	for_each_engine(engine, gt, id) {
1259		struct i915_request *rq;
1260		struct {
1261			ktime_t dt;
1262			u8 freq;
1263		} min, max;
1264
1265		if (!intel_engine_can_store_dword(engine))
1266			continue;
1267
1268		intel_gt_pm_wait_for_idle(gt);
1269		GEM_BUG_ON(intel_rps_is_active(rps));
1270		rps->cur_freq = rps->min_freq;
1271
1272		intel_engine_pm_get(engine);
1273		intel_rc6_disable(&gt->rc6);
1274		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1275
1276		rq = igt_spinner_create_request(&spin,
1277						engine->kernel_context,
1278						MI_NOOP);
1279		if (IS_ERR(rq)) {
1280			err = PTR_ERR(rq);
1281			goto err;
1282		}
1283
1284		i915_request_add(rq);
1285
1286		max.dt = ktime_get();
1287		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1288		max.dt = ktime_sub(ktime_get(), max.dt);
1289
1290		igt_spinner_end(&spin);
1291
1292		min.dt = ktime_get();
1293		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1294		min.dt = ktime_sub(ktime_get(), min.dt);
1295
1296		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1297			engine->name,
1298			max.freq, intel_gpu_freq(rps, max.freq),
1299			ktime_to_ns(max.dt),
1300			min.freq, intel_gpu_freq(rps, min.freq),
1301			ktime_to_ns(min.dt));
1302		if (min.freq >= max.freq) {
1303			pr_err("%s: dynamic reclocking of spinner failed\n!",
1304			       engine->name);
1305			err = -EINVAL;
1306		}
1307
1308err:
1309		intel_rc6_enable(&gt->rc6);
1310		intel_engine_pm_put(engine);
1311
1312		if (igt_flush_test(gt->i915))
1313			err = -EIO;
1314		if (err)
1315			break;
1316	}
1317
1318	igt_spinner_fini(&spin);
1319
1320	return err;
1321}