Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2019 Intel Corporation
   4 */
   5
   6#include <linux/string_helpers.h>
   7
   8#include <drm/i915_drm.h>
   9
  10#include "i915_drv.h"
  11#include "i915_irq.h"
  12#include "intel_breadcrumbs.h"
  13#include "intel_gt.h"
  14#include "intel_gt_clock_utils.h"
  15#include "intel_gt_irq.h"
  16#include "intel_gt_pm_irq.h"
  17#include "intel_gt_regs.h"
  18#include "intel_mchbar_regs.h"
  19#include "intel_pcode.h"
  20#include "intel_rps.h"
  21#include "vlv_sideband.h"
  22#include "../../../platform/x86/intel_ips.h"
  23
  24#define BUSY_MAX_EI	20u /* ms */
  25
  26/*
  27 * Lock protecting IPS related data structures
  28 */
  29static DEFINE_SPINLOCK(mchdev_lock);
  30
  31static struct intel_gt *rps_to_gt(struct intel_rps *rps)
  32{
  33	return container_of(rps, struct intel_gt, rps);
  34}
  35
  36static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
  37{
  38	return rps_to_gt(rps)->i915;
  39}
  40
  41static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
  42{
  43	return rps_to_gt(rps)->uncore;
  44}
  45
  46static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
  47{
  48	struct intel_gt *gt = rps_to_gt(rps);
  49
  50	return &gt->uc.guc.slpc;
  51}
  52
  53static bool rps_uses_slpc(struct intel_rps *rps)
  54{
  55	struct intel_gt *gt = rps_to_gt(rps);
  56
  57	return intel_uc_uses_guc_slpc(&gt->uc);
  58}
  59
  60static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
  61{
  62	return mask & ~rps->pm_intrmsk_mbz;
  63}
  64
  65static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
  66{
  67	intel_uncore_write_fw(uncore, reg, val);
  68}
  69
  70static void rps_timer(struct timer_list *t)
  71{
  72	struct intel_rps *rps = from_timer(rps, t, timer);
  73	struct intel_engine_cs *engine;
  74	ktime_t dt, last, timestamp;
  75	enum intel_engine_id id;
  76	s64 max_busy[3] = {};
  77
  78	timestamp = 0;
  79	for_each_engine(engine, rps_to_gt(rps), id) {
  80		s64 busy;
  81		int i;
  82
  83		dt = intel_engine_get_busy_time(engine, &timestamp);
  84		last = engine->stats.rps;
  85		engine->stats.rps = dt;
  86
  87		busy = ktime_to_ns(ktime_sub(dt, last));
  88		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
  89			if (busy > max_busy[i])
  90				swap(busy, max_busy[i]);
  91		}
  92	}
  93	last = rps->pm_timestamp;
  94	rps->pm_timestamp = timestamp;
  95
  96	if (intel_rps_is_active(rps)) {
  97		s64 busy;
  98		int i;
  99
 100		dt = ktime_sub(timestamp, last);
 101
 102		/*
 103		 * Our goal is to evaluate each engine independently, so we run
 104		 * at the lowest clocks required to sustain the heaviest
 105		 * workload. However, a task may be split into sequential
 106		 * dependent operations across a set of engines, such that
 107		 * the independent contributions do not account for high load,
 108		 * but overall the task is GPU bound. For example, consider
 109		 * video decode on vcs followed by colour post-processing
 110		 * on vecs, followed by general post-processing on rcs.
 111		 * Since multi-engines being active does imply a single
 112		 * continuous workload across all engines, we hedge our
 113		 * bets by only contributing a factor of the distributed
 114		 * load into our busyness calculation.
 115		 */
 116		busy = max_busy[0];
 117		for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
 118			if (!max_busy[i])
 119				break;
 120
 121			busy += div_u64(max_busy[i], 1 << i);
 122		}
 123		GT_TRACE(rps_to_gt(rps),
 124			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
 125			 busy, (int)div64_u64(100 * busy, dt),
 126			 max_busy[0], max_busy[1], max_busy[2],
 127			 rps->pm_interval);
 128
 129		if (100 * busy > rps->power.up_threshold * dt &&
 130		    rps->cur_freq < rps->max_freq_softlimit) {
 131			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
 132			rps->pm_interval = 1;
 133			schedule_work(&rps->work);
 134		} else if (100 * busy < rps->power.down_threshold * dt &&
 135			   rps->cur_freq > rps->min_freq_softlimit) {
 136			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
 137			rps->pm_interval = 1;
 138			schedule_work(&rps->work);
 139		} else {
 140			rps->last_adj = 0;
 141		}
 142
 143		mod_timer(&rps->timer,
 144			  jiffies + msecs_to_jiffies(rps->pm_interval));
 145		rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
 146	}
 147}
 148
 149static void rps_start_timer(struct intel_rps *rps)
 150{
 151	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 152	rps->pm_interval = 1;
 153	mod_timer(&rps->timer, jiffies + 1);
 154}
 155
 156static void rps_stop_timer(struct intel_rps *rps)
 157{
 158	del_timer_sync(&rps->timer);
 159	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
 160	cancel_work_sync(&rps->work);
 161}
 162
 163static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
 164{
 165	u32 mask = 0;
 166
 167	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
 168	if (val > rps->min_freq_softlimit)
 169		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 170			 GEN6_PM_RP_DOWN_THRESHOLD |
 171			 GEN6_PM_RP_DOWN_TIMEOUT);
 172
 173	if (val < rps->max_freq_softlimit)
 174		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 175
 176	mask &= rps->pm_events;
 177
 178	return rps_pm_sanitize_mask(rps, ~mask);
 179}
 180
 181static void rps_reset_ei(struct intel_rps *rps)
 182{
 183	memset(&rps->ei, 0, sizeof(rps->ei));
 184}
 185
 186static void rps_enable_interrupts(struct intel_rps *rps)
 187{
 188	struct intel_gt *gt = rps_to_gt(rps);
 189
 190	GEM_BUG_ON(rps_uses_slpc(rps));
 191
 192	GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
 193		 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
 194
 195	rps_reset_ei(rps);
 196
 197	spin_lock_irq(gt->irq_lock);
 198	gen6_gt_pm_enable_irq(gt, rps->pm_events);
 199	spin_unlock_irq(gt->irq_lock);
 200
 201	intel_uncore_write(gt->uncore,
 202			   GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
 203}
 204
 205static void gen6_rps_reset_interrupts(struct intel_rps *rps)
 206{
 207	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
 208}
 209
 210static void gen11_rps_reset_interrupts(struct intel_rps *rps)
 211{
 212	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
 213		;
 214}
 215
 216static void rps_reset_interrupts(struct intel_rps *rps)
 217{
 218	struct intel_gt *gt = rps_to_gt(rps);
 219
 220	spin_lock_irq(gt->irq_lock);
 221	if (GRAPHICS_VER(gt->i915) >= 11)
 222		gen11_rps_reset_interrupts(rps);
 223	else
 224		gen6_rps_reset_interrupts(rps);
 225
 226	rps->pm_iir = 0;
 227	spin_unlock_irq(gt->irq_lock);
 228}
 229
 230static void rps_disable_interrupts(struct intel_rps *rps)
 231{
 232	struct intel_gt *gt = rps_to_gt(rps);
 233
 234	intel_uncore_write(gt->uncore,
 235			   GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
 236
 237	spin_lock_irq(gt->irq_lock);
 238	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
 239	spin_unlock_irq(gt->irq_lock);
 240
 241	intel_synchronize_irq(gt->i915);
 242
 243	/*
 244	 * Now that we will not be generating any more work, flush any
 245	 * outstanding tasks. As we are called on the RPS idle path,
 246	 * we will reset the GPU to minimum frequencies, so the current
 247	 * state of the worker can be discarded.
 248	 */
 249	cancel_work_sync(&rps->work);
 250
 251	rps_reset_interrupts(rps);
 252	GT_TRACE(gt, "interrupts:off\n");
 253}
 254
 255static const struct cparams {
 256	u16 i;
 257	u16 t;
 258	u16 m;
 259	u16 c;
 260} cparams[] = {
 261	{ 1, 1333, 301, 28664 },
 262	{ 1, 1066, 294, 24460 },
 263	{ 1, 800, 294, 25192 },
 264	{ 0, 1333, 276, 27605 },
 265	{ 0, 1066, 276, 27605 },
 266	{ 0, 800, 231, 23784 },
 267};
 268
 269static void gen5_rps_init(struct intel_rps *rps)
 270{
 271	struct drm_i915_private *i915 = rps_to_i915(rps);
 272	struct intel_uncore *uncore = rps_to_uncore(rps);
 273	u8 fmax, fmin, fstart;
 274	u32 rgvmodectl;
 275	int c_m, i;
 276
 277	if (i915->fsb_freq <= 3200)
 278		c_m = 0;
 279	else if (i915->fsb_freq <= 4800)
 280		c_m = 1;
 281	else
 282		c_m = 2;
 283
 284	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
 285		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
 286			rps->ips.m = cparams[i].m;
 287			rps->ips.c = cparams[i].c;
 288			break;
 289		}
 290	}
 291
 292	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 293
 294	/* Set up min, max, and cur for interrupt handling */
 295	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
 296	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
 297	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 298		MEMMODE_FSTART_SHIFT;
 299	drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
 300		fmax, fmin, fstart);
 301
 302	rps->min_freq = fmax;
 303	rps->efficient_freq = fstart;
 304	rps->max_freq = fmin;
 305}
 306
 307static unsigned long
 308__ips_chipset_val(struct intel_ips *ips)
 309{
 310	struct intel_uncore *uncore =
 311		rps_to_uncore(container_of(ips, struct intel_rps, ips));
 312	unsigned long now = jiffies_to_msecs(jiffies), dt;
 313	unsigned long result;
 314	u64 total, delta;
 315
 316	lockdep_assert_held(&mchdev_lock);
 317
 318	/*
 319	 * Prevent division-by-zero if we are asking too fast.
 320	 * Also, we don't get interesting results if we are polling
 321	 * faster than once in 10ms, so just return the saved value
 322	 * in such cases.
 323	 */
 324	dt = now - ips->last_time1;
 325	if (dt <= 10)
 326		return ips->chipset_power;
 327
 328	/* FIXME: handle per-counter overflow */
 329	total = intel_uncore_read(uncore, DMIEC);
 330	total += intel_uncore_read(uncore, DDREC);
 331	total += intel_uncore_read(uncore, CSIEC);
 332
 333	delta = total - ips->last_count1;
 334
 335	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
 336
 337	ips->last_count1 = total;
 338	ips->last_time1 = now;
 339
 340	ips->chipset_power = result;
 341
 342	return result;
 343}
 344
 345static unsigned long ips_mch_val(struct intel_uncore *uncore)
 346{
 347	unsigned int m, x, b;
 348	u32 tsfs;
 349
 350	tsfs = intel_uncore_read(uncore, TSFS);
 351	x = intel_uncore_read8(uncore, TR1);
 352
 353	b = tsfs & TSFS_INTR_MASK;
 354	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
 355
 356	return m * x / 127 - b;
 357}
 358
 359static int _pxvid_to_vd(u8 pxvid)
 360{
 361	if (pxvid == 0)
 362		return 0;
 363
 364	if (pxvid >= 8 && pxvid < 31)
 365		pxvid = 31;
 366
 367	return (pxvid + 2) * 125;
 368}
 369
 370static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
 371{
 372	const int vd = _pxvid_to_vd(pxvid);
 373
 374	if (INTEL_INFO(i915)->is_mobile)
 375		return max(vd - 1125, 0);
 376
 377	return vd;
 378}
 379
 380static void __gen5_ips_update(struct intel_ips *ips)
 381{
 382	struct intel_uncore *uncore =
 383		rps_to_uncore(container_of(ips, struct intel_rps, ips));
 384	u64 now, delta, dt;
 385	u32 count;
 386
 387	lockdep_assert_held(&mchdev_lock);
 388
 389	now = ktime_get_raw_ns();
 390	dt = now - ips->last_time2;
 391	do_div(dt, NSEC_PER_MSEC);
 392
 393	/* Don't divide by 0 */
 394	if (dt <= 10)
 395		return;
 396
 397	count = intel_uncore_read(uncore, GFXEC);
 398	delta = count - ips->last_count2;
 399
 400	ips->last_count2 = count;
 401	ips->last_time2 = now;
 402
 403	/* More magic constants... */
 404	ips->gfx_power = div_u64(delta * 1181, dt * 10);
 405}
 406
 407static void gen5_rps_update(struct intel_rps *rps)
 408{
 409	spin_lock_irq(&mchdev_lock);
 410	__gen5_ips_update(&rps->ips);
 411	spin_unlock_irq(&mchdev_lock);
 412}
 413
 414static unsigned int gen5_invert_freq(struct intel_rps *rps,
 415				     unsigned int val)
 416{
 417	/* Invert the frequency bin into an ips delay */
 418	val = rps->max_freq - val;
 419	val = rps->min_freq + val;
 420
 421	return val;
 422}
 423
 424static int __gen5_rps_set(struct intel_rps *rps, u8 val)
 425{
 426	struct intel_uncore *uncore = rps_to_uncore(rps);
 427	u16 rgvswctl;
 428
 429	lockdep_assert_held(&mchdev_lock);
 430
 431	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 432	if (rgvswctl & MEMCTL_CMD_STS) {
 433		drm_dbg(&rps_to_i915(rps)->drm,
 434			"gpu busy, RCS change rejected\n");
 435		return -EBUSY; /* still busy with another command */
 436	}
 437
 438	/* Invert the frequency bin into an ips delay */
 439	val = gen5_invert_freq(rps, val);
 440
 441	rgvswctl =
 442		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
 443		(val << MEMCTL_FREQ_SHIFT) |
 444		MEMCTL_SFCAVM;
 445	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 446	intel_uncore_posting_read16(uncore, MEMSWCTL);
 447
 448	rgvswctl |= MEMCTL_CMD_STS;
 449	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 450
 451	return 0;
 452}
 453
 454static int gen5_rps_set(struct intel_rps *rps, u8 val)
 455{
 456	int err;
 457
 458	spin_lock_irq(&mchdev_lock);
 459	err = __gen5_rps_set(rps, val);
 460	spin_unlock_irq(&mchdev_lock);
 461
 462	return err;
 463}
 464
 465static unsigned long intel_pxfreq(u32 vidfreq)
 466{
 467	int div = (vidfreq & 0x3f0000) >> 16;
 468	int post = (vidfreq & 0x3000) >> 12;
 469	int pre = (vidfreq & 0x7);
 470
 471	if (!pre)
 472		return 0;
 473
 474	return div * 133333 / (pre << post);
 475}
 476
 477static unsigned int init_emon(struct intel_uncore *uncore)
 478{
 479	u8 pxw[16];
 480	int i;
 481
 482	/* Disable to program */
 483	intel_uncore_write(uncore, ECR, 0);
 484	intel_uncore_posting_read(uncore, ECR);
 485
 486	/* Program energy weights for various events */
 487	intel_uncore_write(uncore, SDEW, 0x15040d00);
 488	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
 489	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
 490	intel_uncore_write(uncore, CSIEW2, 0x04000004);
 491
 492	for (i = 0; i < 5; i++)
 493		intel_uncore_write(uncore, PEW(i), 0);
 494	for (i = 0; i < 3; i++)
 495		intel_uncore_write(uncore, DEW(i), 0);
 496
 497	/* Program P-state weights to account for frequency power adjustment */
 498	for (i = 0; i < 16; i++) {
 499		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
 500		unsigned int freq = intel_pxfreq(pxvidfreq);
 501		unsigned int vid =
 502			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 503		unsigned int val;
 504
 505		val = vid * vid * freq / 1000 * 255;
 506		val /= 127 * 127 * 900;
 507
 508		pxw[i] = val;
 509	}
 510	/* Render standby states get 0 weight */
 511	pxw[14] = 0;
 512	pxw[15] = 0;
 513
 514	for (i = 0; i < 4; i++) {
 515		intel_uncore_write(uncore, PXW(i),
 516				   pxw[i * 4 + 0] << 24 |
 517				   pxw[i * 4 + 1] << 16 |
 518				   pxw[i * 4 + 2] <<  8 |
 519				   pxw[i * 4 + 3] <<  0);
 520	}
 521
 522	/* Adjust magic regs to magic values (more experimental results) */
 523	intel_uncore_write(uncore, OGW0, 0);
 524	intel_uncore_write(uncore, OGW1, 0);
 525	intel_uncore_write(uncore, EG0, 0x00007f00);
 526	intel_uncore_write(uncore, EG1, 0x0000000e);
 527	intel_uncore_write(uncore, EG2, 0x000e0000);
 528	intel_uncore_write(uncore, EG3, 0x68000300);
 529	intel_uncore_write(uncore, EG4, 0x42000000);
 530	intel_uncore_write(uncore, EG5, 0x00140031);
 531	intel_uncore_write(uncore, EG6, 0);
 532	intel_uncore_write(uncore, EG7, 0);
 533
 534	for (i = 0; i < 8; i++)
 535		intel_uncore_write(uncore, PXWL(i), 0);
 536
 537	/* Enable PMON + select events */
 538	intel_uncore_write(uncore, ECR, 0x80000019);
 539
 540	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
 541}
 542
 543static bool gen5_rps_enable(struct intel_rps *rps)
 544{
 545	struct drm_i915_private *i915 = rps_to_i915(rps);
 546	struct intel_uncore *uncore = rps_to_uncore(rps);
 547	u8 fstart, vstart;
 548	u32 rgvmodectl;
 549
 550	spin_lock_irq(&mchdev_lock);
 551
 552	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 553
 554	/* Enable temp reporting */
 555	intel_uncore_write16(uncore, PMMISC,
 556			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
 557	intel_uncore_write16(uncore, TSC1,
 558			     intel_uncore_read16(uncore, TSC1) | TSE);
 559
 560	/* 100ms RC evaluation intervals */
 561	intel_uncore_write(uncore, RCUPEI, 100000);
 562	intel_uncore_write(uncore, RCDNEI, 100000);
 563
 564	/* Set max/min thresholds to 90ms and 80ms respectively */
 565	intel_uncore_write(uncore, RCBMAXAVG, 90000);
 566	intel_uncore_write(uncore, RCBMINAVG, 80000);
 567
 568	intel_uncore_write(uncore, MEMIHYST, 1);
 569
 570	/* Set up min, max, and cur for interrupt handling */
 571	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 572		MEMMODE_FSTART_SHIFT;
 573
 574	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
 575		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 576
 577	intel_uncore_write(uncore,
 578			   MEMINTREN,
 579			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 580
 581	intel_uncore_write(uncore, VIDSTART, vstart);
 582	intel_uncore_posting_read(uncore, VIDSTART);
 583
 584	rgvmodectl |= MEMMODE_SWMODE_EN;
 585	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
 586
 587	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
 588			     MEMCTL_CMD_STS) == 0, 10))
 589		drm_err(&uncore->i915->drm,
 590			"stuck trying to change perf mode\n");
 591	mdelay(1);
 592
 593	__gen5_rps_set(rps, rps->cur_freq);
 594
 595	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
 596	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
 597	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
 598	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
 599
 600	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
 601	rps->ips.last_time2 = ktime_get_raw_ns();
 602
 603	spin_lock(&i915->irq_lock);
 604	ilk_enable_display_irq(i915, DE_PCU_EVENT);
 605	spin_unlock(&i915->irq_lock);
 606
 607	spin_unlock_irq(&mchdev_lock);
 608
 609	rps->ips.corr = init_emon(uncore);
 610
 611	return true;
 612}
 613
 614static void gen5_rps_disable(struct intel_rps *rps)
 615{
 616	struct drm_i915_private *i915 = rps_to_i915(rps);
 617	struct intel_uncore *uncore = rps_to_uncore(rps);
 618	u16 rgvswctl;
 619
 620	spin_lock_irq(&mchdev_lock);
 621
 622	spin_lock(&i915->irq_lock);
 623	ilk_disable_display_irq(i915, DE_PCU_EVENT);
 624	spin_unlock(&i915->irq_lock);
 625
 626	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 627
 628	/* Ack interrupts, disable EFC interrupt */
 629	intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
 630	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
 631
 632	/* Go back to the starting frequency */
 633	__gen5_rps_set(rps, rps->idle_freq);
 634	mdelay(1);
 635	rgvswctl |= MEMCTL_CMD_STS;
 636	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
 637	mdelay(1);
 638
 639	spin_unlock_irq(&mchdev_lock);
 640}
 641
 642static u32 rps_limits(struct intel_rps *rps, u8 val)
 643{
 644	u32 limits;
 645
 646	/*
 647	 * Only set the down limit when we've reached the lowest level to avoid
 648	 * getting more interrupts, otherwise leave this clear. This prevents a
 649	 * race in the hw when coming out of rc6: There's a tiny window where
 650	 * the hw runs at the minimal clock before selecting the desired
 651	 * frequency, if the down threshold expires in that window we will not
 652	 * receive a down interrupt.
 653	 */
 654	if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
 655		limits = rps->max_freq_softlimit << 23;
 656		if (val <= rps->min_freq_softlimit)
 657			limits |= rps->min_freq_softlimit << 14;
 658	} else {
 659		limits = rps->max_freq_softlimit << 24;
 660		if (val <= rps->min_freq_softlimit)
 661			limits |= rps->min_freq_softlimit << 16;
 662	}
 663
 664	return limits;
 665}
 666
 667static void rps_set_power(struct intel_rps *rps, int new_power)
 668{
 669	struct intel_gt *gt = rps_to_gt(rps);
 670	struct intel_uncore *uncore = gt->uncore;
 671	u32 threshold_up = 0, threshold_down = 0; /* in % */
 672	u32 ei_up = 0, ei_down = 0;
 673
 674	lockdep_assert_held(&rps->power.mutex);
 675
 676	if (new_power == rps->power.mode)
 677		return;
 678
 679	threshold_up = 95;
 680	threshold_down = 85;
 681
 682	/* Note the units here are not exactly 1us, but 1280ns. */
 683	switch (new_power) {
 684	case LOW_POWER:
 685		ei_up = 16000;
 686		ei_down = 32000;
 687		break;
 688
 689	case BETWEEN:
 690		ei_up = 13000;
 691		ei_down = 32000;
 692		break;
 693
 694	case HIGH_POWER:
 695		ei_up = 10000;
 696		ei_down = 32000;
 697		break;
 698	}
 699
 700	/* When byt can survive without system hang with dynamic
 701	 * sw freq adjustments, this restriction can be lifted.
 702	 */
 703	if (IS_VALLEYVIEW(gt->i915))
 704		goto skip_hw_write;
 705
 706	GT_TRACE(gt,
 707		 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
 708		 new_power, threshold_up, ei_up, threshold_down, ei_down);
 709
 710	set(uncore, GEN6_RP_UP_EI,
 711	    intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
 712	set(uncore, GEN6_RP_UP_THRESHOLD,
 713	    intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
 714
 715	set(uncore, GEN6_RP_DOWN_EI,
 716	    intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
 717	set(uncore, GEN6_RP_DOWN_THRESHOLD,
 718	    intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
 719
 720	set(uncore, GEN6_RP_CONTROL,
 721	    (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
 722	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
 723	    GEN6_RP_MEDIA_IS_GFX |
 724	    GEN6_RP_ENABLE |
 725	    GEN6_RP_UP_BUSY_AVG |
 726	    GEN6_RP_DOWN_IDLE_AVG);
 727
 728skip_hw_write:
 729	rps->power.mode = new_power;
 730	rps->power.up_threshold = threshold_up;
 731	rps->power.down_threshold = threshold_down;
 732}
 733
 734static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
 735{
 736	int new_power;
 737
 738	new_power = rps->power.mode;
 739	switch (rps->power.mode) {
 740	case LOW_POWER:
 741		if (val > rps->efficient_freq + 1 &&
 742		    val > rps->cur_freq)
 743			new_power = BETWEEN;
 744		break;
 745
 746	case BETWEEN:
 747		if (val <= rps->efficient_freq &&
 748		    val < rps->cur_freq)
 749			new_power = LOW_POWER;
 750		else if (val >= rps->rp0_freq &&
 751			 val > rps->cur_freq)
 752			new_power = HIGH_POWER;
 753		break;
 754
 755	case HIGH_POWER:
 756		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
 757		    val < rps->cur_freq)
 758			new_power = BETWEEN;
 759		break;
 760	}
 761	/* Max/min bins are special */
 762	if (val <= rps->min_freq_softlimit)
 763		new_power = LOW_POWER;
 764	if (val >= rps->max_freq_softlimit)
 765		new_power = HIGH_POWER;
 766
 767	mutex_lock(&rps->power.mutex);
 768	if (rps->power.interactive)
 769		new_power = HIGH_POWER;
 770	rps_set_power(rps, new_power);
 771	mutex_unlock(&rps->power.mutex);
 772}
 773
 774void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
 775{
 776	GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n",
 777		 str_yes_no(interactive));
 778
 779	mutex_lock(&rps->power.mutex);
 780	if (interactive) {
 781		if (!rps->power.interactive++ && intel_rps_is_active(rps))
 782			rps_set_power(rps, HIGH_POWER);
 783	} else {
 784		GEM_BUG_ON(!rps->power.interactive);
 785		rps->power.interactive--;
 786	}
 787	mutex_unlock(&rps->power.mutex);
 788}
 789
 790static int gen6_rps_set(struct intel_rps *rps, u8 val)
 791{
 792	struct intel_uncore *uncore = rps_to_uncore(rps);
 793	struct drm_i915_private *i915 = rps_to_i915(rps);
 794	u32 swreq;
 795
 796	GEM_BUG_ON(rps_uses_slpc(rps));
 797
 798	if (GRAPHICS_VER(i915) >= 9)
 799		swreq = GEN9_FREQUENCY(val);
 800	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
 801		swreq = HSW_FREQUENCY(val);
 802	else
 803		swreq = (GEN6_FREQUENCY(val) |
 804			 GEN6_OFFSET(0) |
 805			 GEN6_AGGRESSIVE_TURBO);
 806	set(uncore, GEN6_RPNSWREQ, swreq);
 807
 808	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
 809		 val, intel_gpu_freq(rps, val), swreq);
 810
 811	return 0;
 812}
 813
 814static int vlv_rps_set(struct intel_rps *rps, u8 val)
 815{
 816	struct drm_i915_private *i915 = rps_to_i915(rps);
 817	int err;
 818
 819	vlv_punit_get(i915);
 820	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
 821	vlv_punit_put(i915);
 822
 823	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
 824		 val, intel_gpu_freq(rps, val));
 825
 826	return err;
 827}
 828
 829static int rps_set(struct intel_rps *rps, u8 val, bool update)
 830{
 831	struct drm_i915_private *i915 = rps_to_i915(rps);
 832	int err;
 833
 834	if (val == rps->last_freq)
 835		return 0;
 836
 837	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
 838		err = vlv_rps_set(rps, val);
 839	else if (GRAPHICS_VER(i915) >= 6)
 840		err = gen6_rps_set(rps, val);
 841	else
 842		err = gen5_rps_set(rps, val);
 843	if (err)
 844		return err;
 845
 846	if (update && GRAPHICS_VER(i915) >= 6)
 847		gen6_rps_set_thresholds(rps, val);
 848	rps->last_freq = val;
 849
 850	return 0;
 851}
 852
 853void intel_rps_unpark(struct intel_rps *rps)
 854{
 855	if (!intel_rps_is_enabled(rps))
 856		return;
 857
 858	GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
 859
 860	/*
 861	 * Use the user's desired frequency as a guide, but for better
 862	 * performance, jump directly to RPe as our starting frequency.
 863	 */
 864	mutex_lock(&rps->lock);
 865
 866	intel_rps_set_active(rps);
 867	intel_rps_set(rps,
 868		      clamp(rps->cur_freq,
 869			    rps->min_freq_softlimit,
 870			    rps->max_freq_softlimit));
 871
 872	mutex_unlock(&rps->lock);
 873
 874	rps->pm_iir = 0;
 875	if (intel_rps_has_interrupts(rps))
 876		rps_enable_interrupts(rps);
 877	if (intel_rps_uses_timer(rps))
 878		rps_start_timer(rps);
 879
 880	if (GRAPHICS_VER(rps_to_i915(rps)) == 5)
 881		gen5_rps_update(rps);
 882}
 883
 884void intel_rps_park(struct intel_rps *rps)
 885{
 886	int adj;
 887
 888	if (!intel_rps_is_enabled(rps))
 889		return;
 890
 891	if (!intel_rps_clear_active(rps))
 892		return;
 893
 894	if (intel_rps_uses_timer(rps))
 895		rps_stop_timer(rps);
 896	if (intel_rps_has_interrupts(rps))
 897		rps_disable_interrupts(rps);
 898
 899	if (rps->last_freq <= rps->idle_freq)
 900		return;
 901
 902	/*
 903	 * The punit delays the write of the frequency and voltage until it
 904	 * determines the GPU is awake. During normal usage we don't want to
 905	 * waste power changing the frequency if the GPU is sleeping (rc6).
 906	 * However, the GPU and driver is now idle and we do not want to delay
 907	 * switching to minimum voltage (reducing power whilst idle) as we do
 908	 * not expect to be woken in the near future and so must flush the
 909	 * change by waking the device.
 910	 *
 911	 * We choose to take the media powerwell (either would do to trick the
 912	 * punit into committing the voltage change) as that takes a lot less
 913	 * power than the render powerwell.
 914	 */
 915	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 916	rps_set(rps, rps->idle_freq, false);
 917	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
 918
 919	/*
 920	 * Since we will try and restart from the previously requested
 921	 * frequency on unparking, treat this idle point as a downclock
 922	 * interrupt and reduce the frequency for resume. If we park/unpark
 923	 * more frequently than the rps worker can run, we will not respond
 924	 * to any EI and never see a change in frequency.
 925	 *
 926	 * (Note we accommodate Cherryview's limitation of only using an
 927	 * even bin by applying it to all.)
 928	 */
 929	adj = rps->last_adj;
 930	if (adj < 0)
 931		adj *= 2;
 932	else /* CHV needs even encode values */
 933		adj = -2;
 934	rps->last_adj = adj;
 935	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
 936	if (rps->cur_freq < rps->efficient_freq) {
 937		rps->cur_freq = rps->efficient_freq;
 938		rps->last_adj = 0;
 939	}
 940
 941	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 942}
 943
 944u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
 945{
 946	struct intel_guc_slpc *slpc;
 947
 948	if (rps_uses_slpc(rps)) {
 949		slpc = rps_to_slpc(rps);
 950
 951		return slpc->boost_freq;
 952	} else {
 953		return intel_gpu_freq(rps, rps->boost_freq);
 954	}
 955}
 956
 957static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
 958{
 959	bool boost = false;
 960
 961	/* Validate against (static) hardware limits */
 962	val = intel_freq_opcode(rps, val);
 963	if (val < rps->min_freq || val > rps->max_freq)
 964		return -EINVAL;
 965
 966	mutex_lock(&rps->lock);
 967	if (val != rps->boost_freq) {
 968		rps->boost_freq = val;
 969		boost = atomic_read(&rps->num_waiters);
 970	}
 971	mutex_unlock(&rps->lock);
 972	if (boost)
 973		schedule_work(&rps->work);
 974
 975	return 0;
 976}
 977
 978int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
 979{
 980	struct intel_guc_slpc *slpc;
 981
 982	if (rps_uses_slpc(rps)) {
 983		slpc = rps_to_slpc(rps);
 984
 985		return intel_guc_slpc_set_boost_freq(slpc, freq);
 986	} else {
 987		return rps_set_boost_freq(rps, freq);
 988	}
 989}
 990
 991void intel_rps_dec_waiters(struct intel_rps *rps)
 992{
 993	struct intel_guc_slpc *slpc;
 994
 995	if (rps_uses_slpc(rps)) {
 996		slpc = rps_to_slpc(rps);
 997
 998		intel_guc_slpc_dec_waiters(slpc);
 999	} else {
1000		atomic_dec(&rps->num_waiters);
1001	}
1002}
1003
1004void intel_rps_boost(struct i915_request *rq)
1005{
1006	struct intel_guc_slpc *slpc;
1007
1008	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
1009		return;
1010
1011	/* Serializes with i915_request_retire() */
1012	if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
1013		struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
1014
1015		if (rps_uses_slpc(rps)) {
1016			slpc = rps_to_slpc(rps);
1017
1018			if (slpc->min_freq_softlimit >= slpc->boost_freq)
1019				return;
1020
1021			/* Return if old value is non zero */
1022			if (!atomic_fetch_inc(&slpc->num_waiters)) {
1023				GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
1024					 rq->fence.context, rq->fence.seqno);
1025				schedule_work(&slpc->boost_work);
1026			}
1027
1028			return;
1029		}
1030
1031		if (atomic_fetch_inc(&rps->num_waiters))
1032			return;
1033
1034		if (!intel_rps_is_active(rps))
1035			return;
1036
1037		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
1038			 rq->fence.context, rq->fence.seqno);
1039
1040		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
1041			schedule_work(&rps->work);
1042
1043		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
1044	}
1045}
1046
1047int intel_rps_set(struct intel_rps *rps, u8 val)
1048{
1049	int err;
1050
1051	lockdep_assert_held(&rps->lock);
1052	GEM_BUG_ON(val > rps->max_freq);
1053	GEM_BUG_ON(val < rps->min_freq);
1054
1055	if (intel_rps_is_active(rps)) {
1056		err = rps_set(rps, val, true);
1057		if (err)
1058			return err;
1059
1060		/*
1061		 * Make sure we continue to get interrupts
1062		 * until we hit the minimum or maximum frequencies.
1063		 */
1064		if (intel_rps_has_interrupts(rps)) {
1065			struct intel_uncore *uncore = rps_to_uncore(rps);
1066
1067			set(uncore,
1068			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
1069
1070			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
1071		}
1072	}
1073
1074	rps->cur_freq = val;
1075	return 0;
1076}
1077
1078static u32 intel_rps_read_state_cap(struct intel_rps *rps)
1079{
1080	struct drm_i915_private *i915 = rps_to_i915(rps);
1081	struct intel_uncore *uncore = rps_to_uncore(rps);
1082
1083	if (IS_PONTEVECCHIO(i915))
1084		return intel_uncore_read(uncore, PVC_RP_STATE_CAP);
1085	else if (IS_XEHPSDV(i915))
1086		return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
1087	else if (IS_GEN9_LP(i915))
1088		return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
1089	else
1090		return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
1091}
1092
1093static void
1094mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1095{
1096	struct intel_uncore *uncore = rps_to_uncore(rps);
1097	u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
1098				intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
1099				intel_uncore_read(uncore, MTL_RP_STATE_CAP);
1100	u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
1101			intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
1102			intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
1103
1104	/* MTL values are in units of 16.67 MHz */
1105	caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
1106	caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
1107	caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
1108}
1109
1110static void
1111__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1112{
1113	struct drm_i915_private *i915 = rps_to_i915(rps);
1114	u32 rp_state_cap;
1115
1116	rp_state_cap = intel_rps_read_state_cap(rps);
1117
1118	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
1119	if (IS_GEN9_LP(i915)) {
1120		caps->rp0_freq = (rp_state_cap >> 16) & 0xff;
1121		caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1122		caps->min_freq = (rp_state_cap >>  0) & 0xff;
1123	} else {
1124		caps->rp0_freq = (rp_state_cap >>  0) & 0xff;
1125		if (GRAPHICS_VER(i915) >= 10)
1126			caps->rp1_freq = REG_FIELD_GET(RPE_MASK,
1127						       intel_uncore_read(to_gt(i915)->uncore,
1128						       GEN10_FREQ_INFO_REC));
1129		else
1130			caps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1131		caps->min_freq = (rp_state_cap >> 16) & 0xff;
1132	}
1133
1134	if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1135		/*
1136		 * In this case rp_state_cap register reports frequencies in
1137		 * units of 50 MHz. Convert these to the actual "hw unit", i.e.
1138		 * units of 16.67 MHz
1139		 */
1140		caps->rp0_freq *= GEN9_FREQ_SCALER;
1141		caps->rp1_freq *= GEN9_FREQ_SCALER;
1142		caps->min_freq *= GEN9_FREQ_SCALER;
1143	}
1144}
1145
1146/**
1147 * gen6_rps_get_freq_caps - Get freq caps exposed by HW
1148 * @rps: the intel_rps structure
1149 * @caps: returned freq caps
1150 *
1151 * Returned "caps" frequencies should be converted to MHz using
1152 * intel_gpu_freq()
1153 */
1154void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
1155{
1156	struct drm_i915_private *i915 = rps_to_i915(rps);
1157
1158	if (IS_METEORLAKE(i915))
1159		return mtl_get_freq_caps(rps, caps);
1160	else
1161		return __gen6_rps_get_freq_caps(rps, caps);
1162}
1163
1164static void gen6_rps_init(struct intel_rps *rps)
1165{
1166	struct drm_i915_private *i915 = rps_to_i915(rps);
1167	struct intel_rps_freq_caps caps;
1168
1169	gen6_rps_get_freq_caps(rps, &caps);
1170	rps->rp0_freq = caps.rp0_freq;
1171	rps->rp1_freq = caps.rp1_freq;
1172	rps->min_freq = caps.min_freq;
1173
1174	/* hw_max = RP0 until we check for overclocking */
1175	rps->max_freq = rps->rp0_freq;
1176
1177	rps->efficient_freq = rps->rp1_freq;
1178	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
1179	    IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1180		u32 ddcc_status = 0;
1181		u32 mult = 1;
1182
1183		if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11)
1184			mult = GEN9_FREQ_SCALER;
1185		if (snb_pcode_read(rps_to_gt(rps)->uncore,
1186				   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
1187				   &ddcc_status, NULL) == 0)
1188			rps->efficient_freq =
1189				clamp_t(u32,
1190					((ddcc_status >> 8) & 0xff) * mult,
1191					rps->min_freq,
1192					rps->max_freq);
1193	}
1194}
1195
1196static bool rps_reset(struct intel_rps *rps)
1197{
1198	struct drm_i915_private *i915 = rps_to_i915(rps);
1199
1200	/* force a reset */
1201	rps->power.mode = -1;
1202	rps->last_freq = -1;
1203
1204	if (rps_set(rps, rps->min_freq, true)) {
1205		drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
1206		return false;
1207	}
1208
1209	rps->cur_freq = rps->min_freq;
1210	return true;
1211}
1212
1213/* See the Gen9_GT_PM_Programming_Guide doc for the below */
1214static bool gen9_rps_enable(struct intel_rps *rps)
1215{
1216	struct intel_gt *gt = rps_to_gt(rps);
1217	struct intel_uncore *uncore = gt->uncore;
1218
1219	/* Program defaults and thresholds for RPS */
1220	if (GRAPHICS_VER(gt->i915) == 9)
1221		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1222				      GEN9_FREQUENCY(rps->rp1_freq));
1223
1224	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
1225
1226	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1227
1228	return rps_reset(rps);
1229}
1230
1231static bool gen8_rps_enable(struct intel_rps *rps)
1232{
1233	struct intel_uncore *uncore = rps_to_uncore(rps);
1234
1235	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1236			      HSW_FREQUENCY(rps->rp1_freq));
1237
1238	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1239
1240	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1241
1242	return rps_reset(rps);
1243}
1244
1245static bool gen6_rps_enable(struct intel_rps *rps)
1246{
1247	struct intel_uncore *uncore = rps_to_uncore(rps);
1248
1249	/* Power down if completely idle for over 50ms */
1250	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
1251	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1252
1253	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1254			  GEN6_PM_RP_DOWN_THRESHOLD |
1255			  GEN6_PM_RP_DOWN_TIMEOUT);
1256
1257	return rps_reset(rps);
1258}
1259
1260static int chv_rps_max_freq(struct intel_rps *rps)
1261{
1262	struct drm_i915_private *i915 = rps_to_i915(rps);
1263	struct intel_gt *gt = rps_to_gt(rps);
1264	u32 val;
1265
1266	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1267
1268	switch (gt->info.sseu.eu_total) {
1269	case 8:
1270		/* (2 * 4) config */
1271		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
1272		break;
1273	case 12:
1274		/* (2 * 6) config */
1275		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
1276		break;
1277	case 16:
1278		/* (2 * 8) config */
1279	default:
1280		/* Setting (2 * 8) Min RP0 for any other combination */
1281		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
1282		break;
1283	}
1284
1285	return val & FB_GFX_FREQ_FUSE_MASK;
1286}
1287
1288static int chv_rps_rpe_freq(struct intel_rps *rps)
1289{
1290	struct drm_i915_private *i915 = rps_to_i915(rps);
1291	u32 val;
1292
1293	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
1294	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
1295
1296	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
1297}
1298
1299static int chv_rps_guar_freq(struct intel_rps *rps)
1300{
1301	struct drm_i915_private *i915 = rps_to_i915(rps);
1302	u32 val;
1303
1304	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1305
1306	return val & FB_GFX_FREQ_FUSE_MASK;
1307}
1308
1309static u32 chv_rps_min_freq(struct intel_rps *rps)
1310{
1311	struct drm_i915_private *i915 = rps_to_i915(rps);
1312	u32 val;
1313
1314	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1315	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1316
1317	return val & FB_GFX_FREQ_FUSE_MASK;
1318}
1319
1320static bool chv_rps_enable(struct intel_rps *rps)
1321{
1322	struct intel_uncore *uncore = rps_to_uncore(rps);
1323	struct drm_i915_private *i915 = rps_to_i915(rps);
1324	u32 val;
1325
1326	/* 1: Program defaults and thresholds for RPS*/
1327	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1328	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1329	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1330	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1331	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1332
1333	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1334
1335	/* 2: Enable RPS */
1336	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1337			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1338			      GEN6_RP_MEDIA_IS_GFX |
1339			      GEN6_RP_ENABLE |
1340			      GEN6_RP_UP_BUSY_AVG |
1341			      GEN6_RP_DOWN_IDLE_AVG);
1342
1343	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1344			  GEN6_PM_RP_DOWN_THRESHOLD |
1345			  GEN6_PM_RP_DOWN_TIMEOUT);
1346
1347	/* Setting Fixed Bias */
1348	vlv_punit_get(i915);
1349
1350	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1351	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1352
1353	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1354
1355	vlv_punit_put(i915);
1356
1357	/* RPS code assumes GPLL is used */
1358	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1359		      "GPLL not enabled\n");
1360
1361	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
1362		str_yes_no(val & GPLLENABLE));
1363	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1364
1365	return rps_reset(rps);
1366}
1367
1368static int vlv_rps_guar_freq(struct intel_rps *rps)
1369{
1370	struct drm_i915_private *i915 = rps_to_i915(rps);
1371	u32 val, rp1;
1372
1373	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1374
1375	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1376	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1377
1378	return rp1;
1379}
1380
1381static int vlv_rps_max_freq(struct intel_rps *rps)
1382{
1383	struct drm_i915_private *i915 = rps_to_i915(rps);
1384	u32 val, rp0;
1385
1386	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1387
1388	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1389	/* Clamp to max */
1390	rp0 = min_t(u32, rp0, 0xea);
1391
1392	return rp0;
1393}
1394
1395static int vlv_rps_rpe_freq(struct intel_rps *rps)
1396{
1397	struct drm_i915_private *i915 = rps_to_i915(rps);
1398	u32 val, rpe;
1399
1400	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1401	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1402	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1403	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1404
1405	return rpe;
1406}
1407
1408static int vlv_rps_min_freq(struct intel_rps *rps)
1409{
1410	struct drm_i915_private *i915 = rps_to_i915(rps);
1411	u32 val;
1412
1413	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1414	/*
1415	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1416	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1417	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1418	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1419	 * to make sure it matches what Punit accepts.
1420	 */
1421	return max_t(u32, val, 0xc0);
1422}
1423
1424static bool vlv_rps_enable(struct intel_rps *rps)
1425{
1426	struct intel_uncore *uncore = rps_to_uncore(rps);
1427	struct drm_i915_private *i915 = rps_to_i915(rps);
1428	u32 val;
1429
1430	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1431	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1432	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1433	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1434	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1435
1436	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1437
1438	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1439			      GEN6_RP_MEDIA_TURBO |
1440			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1441			      GEN6_RP_MEDIA_IS_GFX |
1442			      GEN6_RP_ENABLE |
1443			      GEN6_RP_UP_BUSY_AVG |
1444			      GEN6_RP_DOWN_IDLE_CONT);
1445
1446	/* WaGsvRC0ResidencyMethod:vlv */
1447	rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
1448
1449	vlv_punit_get(i915);
1450
1451	/* Setting Fixed Bias */
1452	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1453	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1454
1455	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1456
1457	vlv_punit_put(i915);
1458
1459	/* RPS code assumes GPLL is used */
1460	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1461		      "GPLL not enabled\n");
1462
1463	drm_dbg(&i915->drm, "GPLL enabled? %s\n",
1464		str_yes_no(val & GPLLENABLE));
1465	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1466
1467	return rps_reset(rps);
1468}
1469
1470static unsigned long __ips_gfx_val(struct intel_ips *ips)
1471{
1472	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1473	struct intel_uncore *uncore = rps_to_uncore(rps);
1474	unsigned int t, state1, state2;
1475	u32 pxvid, ext_v;
1476	u64 corr, corr2;
1477
1478	lockdep_assert_held(&mchdev_lock);
1479
1480	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1481	pxvid = (pxvid >> 24) & 0x7f;
1482	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1483
1484	state1 = ext_v;
1485
1486	/* Revel in the empirically derived constants */
1487
1488	/* Correction factor in 1/100000 units */
1489	t = ips_mch_val(uncore);
1490	if (t > 80)
1491		corr = t * 2349 + 135940;
1492	else if (t >= 50)
1493		corr = t * 964 + 29317;
1494	else /* < 50 */
1495		corr = t * 301 + 1004;
1496
1497	corr = div_u64(corr * 150142 * state1, 10000) - 78642;
1498	corr2 = div_u64(corr, 100000) * ips->corr;
1499
1500	state2 = div_u64(corr2 * state1, 10000);
1501	state2 /= 100; /* convert to mW */
1502
1503	__gen5_ips_update(ips);
1504
1505	return ips->gfx_power + state2;
1506}
1507
1508static bool has_busy_stats(struct intel_rps *rps)
1509{
1510	struct intel_engine_cs *engine;
1511	enum intel_engine_id id;
1512
1513	for_each_engine(engine, rps_to_gt(rps), id) {
1514		if (!intel_engine_supports_stats(engine))
1515			return false;
1516	}
1517
1518	return true;
1519}
1520
1521void intel_rps_enable(struct intel_rps *rps)
1522{
1523	struct drm_i915_private *i915 = rps_to_i915(rps);
1524	struct intel_uncore *uncore = rps_to_uncore(rps);
1525	bool enabled = false;
1526
1527	if (!HAS_RPS(i915))
1528		return;
1529
1530	if (rps_uses_slpc(rps))
1531		return;
1532
1533	intel_gt_check_clock_frequency(rps_to_gt(rps));
1534
1535	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1536	if (rps->max_freq <= rps->min_freq)
1537		/* leave disabled, no room for dynamic reclocking */;
1538	else if (IS_CHERRYVIEW(i915))
1539		enabled = chv_rps_enable(rps);
1540	else if (IS_VALLEYVIEW(i915))
1541		enabled = vlv_rps_enable(rps);
1542	else if (GRAPHICS_VER(i915) >= 9)
1543		enabled = gen9_rps_enable(rps);
1544	else if (GRAPHICS_VER(i915) >= 8)
1545		enabled = gen8_rps_enable(rps);
1546	else if (GRAPHICS_VER(i915) >= 6)
1547		enabled = gen6_rps_enable(rps);
1548	else if (IS_IRONLAKE_M(i915))
1549		enabled = gen5_rps_enable(rps);
1550	else
1551		MISSING_CASE(GRAPHICS_VER(i915));
1552	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1553	if (!enabled)
1554		return;
1555
1556	GT_TRACE(rps_to_gt(rps),
1557		 "min:%x, max:%x, freq:[%d, %d]\n",
1558		 rps->min_freq, rps->max_freq,
1559		 intel_gpu_freq(rps, rps->min_freq),
1560		 intel_gpu_freq(rps, rps->max_freq));
1561
1562	GEM_BUG_ON(rps->max_freq < rps->min_freq);
1563	GEM_BUG_ON(rps->idle_freq > rps->max_freq);
1564
1565	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
1566	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
1567
1568	if (has_busy_stats(rps))
1569		intel_rps_set_timer(rps);
1570	else if (GRAPHICS_VER(i915) >= 6 && GRAPHICS_VER(i915) <= 11)
1571		intel_rps_set_interrupts(rps);
1572	else
1573		/* Ironlake currently uses intel_ips.ko */ {}
1574
1575	intel_rps_set_enabled(rps);
1576}
1577
1578static void gen6_rps_disable(struct intel_rps *rps)
1579{
1580	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1581}
1582
1583void intel_rps_disable(struct intel_rps *rps)
1584{
1585	struct drm_i915_private *i915 = rps_to_i915(rps);
1586
1587	if (!intel_rps_is_enabled(rps))
1588		return;
1589
1590	intel_rps_clear_enabled(rps);
1591	intel_rps_clear_interrupts(rps);
1592	intel_rps_clear_timer(rps);
1593
1594	if (GRAPHICS_VER(i915) >= 6)
1595		gen6_rps_disable(rps);
1596	else if (IS_IRONLAKE_M(i915))
1597		gen5_rps_disable(rps);
1598}
1599
1600static int byt_gpu_freq(struct intel_rps *rps, int val)
1601{
1602	/*
1603	 * N = val - 0xb7
1604	 * Slow = Fast = GPLL ref * N
1605	 */
1606	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1607}
1608
1609static int byt_freq_opcode(struct intel_rps *rps, int val)
1610{
1611	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1612}
1613
1614static int chv_gpu_freq(struct intel_rps *rps, int val)
1615{
1616	/*
1617	 * N = val / 2
1618	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1619	 */
1620	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1621}
1622
1623static int chv_freq_opcode(struct intel_rps *rps, int val)
1624{
1625	/* CHV needs even values */
1626	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1627}
1628
1629int intel_gpu_freq(struct intel_rps *rps, int val)
1630{
1631	struct drm_i915_private *i915 = rps_to_i915(rps);
1632
1633	if (GRAPHICS_VER(i915) >= 9)
1634		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1635					 GEN9_FREQ_SCALER);
1636	else if (IS_CHERRYVIEW(i915))
1637		return chv_gpu_freq(rps, val);
1638	else if (IS_VALLEYVIEW(i915))
1639		return byt_gpu_freq(rps, val);
1640	else if (GRAPHICS_VER(i915) >= 6)
1641		return val * GT_FREQUENCY_MULTIPLIER;
1642	else
1643		return val;
1644}
1645
1646int intel_freq_opcode(struct intel_rps *rps, int val)
1647{
1648	struct drm_i915_private *i915 = rps_to_i915(rps);
1649
1650	if (GRAPHICS_VER(i915) >= 9)
1651		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1652					 GT_FREQUENCY_MULTIPLIER);
1653	else if (IS_CHERRYVIEW(i915))
1654		return chv_freq_opcode(rps, val);
1655	else if (IS_VALLEYVIEW(i915))
1656		return byt_freq_opcode(rps, val);
1657	else if (GRAPHICS_VER(i915) >= 6)
1658		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1659	else
1660		return val;
1661}
1662
1663static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1664{
1665	struct drm_i915_private *i915 = rps_to_i915(rps);
1666
1667	rps->gpll_ref_freq =
1668		vlv_get_cck_clock(i915, "GPLL ref",
1669				  CCK_GPLL_CLOCK_CONTROL,
1670				  i915->czclk_freq);
1671
1672	drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
1673		rps->gpll_ref_freq);
1674}
1675
1676static void vlv_rps_init(struct intel_rps *rps)
1677{
1678	struct drm_i915_private *i915 = rps_to_i915(rps);
1679	u32 val;
1680
1681	vlv_iosf_sb_get(i915,
1682			BIT(VLV_IOSF_SB_PUNIT) |
1683			BIT(VLV_IOSF_SB_NC) |
1684			BIT(VLV_IOSF_SB_CCK));
1685
1686	vlv_init_gpll_ref_freq(rps);
1687
1688	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1689	switch ((val >> 6) & 3) {
1690	case 0:
1691	case 1:
1692		i915->mem_freq = 800;
1693		break;
1694	case 2:
1695		i915->mem_freq = 1066;
1696		break;
1697	case 3:
1698		i915->mem_freq = 1333;
1699		break;
1700	}
1701	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1702
1703	rps->max_freq = vlv_rps_max_freq(rps);
1704	rps->rp0_freq = rps->max_freq;
1705	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1706		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1707
1708	rps->efficient_freq = vlv_rps_rpe_freq(rps);
1709	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1710		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1711
1712	rps->rp1_freq = vlv_rps_guar_freq(rps);
1713	drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1714		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1715
1716	rps->min_freq = vlv_rps_min_freq(rps);
1717	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1718		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1719
1720	vlv_iosf_sb_put(i915,
1721			BIT(VLV_IOSF_SB_PUNIT) |
1722			BIT(VLV_IOSF_SB_NC) |
1723			BIT(VLV_IOSF_SB_CCK));
1724}
1725
1726static void chv_rps_init(struct intel_rps *rps)
1727{
1728	struct drm_i915_private *i915 = rps_to_i915(rps);
1729	u32 val;
1730
1731	vlv_iosf_sb_get(i915,
1732			BIT(VLV_IOSF_SB_PUNIT) |
1733			BIT(VLV_IOSF_SB_NC) |
1734			BIT(VLV_IOSF_SB_CCK));
1735
1736	vlv_init_gpll_ref_freq(rps);
1737
1738	val = vlv_cck_read(i915, CCK_FUSE_REG);
1739
1740	switch ((val >> 2) & 0x7) {
1741	case 3:
1742		i915->mem_freq = 2000;
1743		break;
1744	default:
1745		i915->mem_freq = 1600;
1746		break;
1747	}
1748	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1749
1750	rps->max_freq = chv_rps_max_freq(rps);
1751	rps->rp0_freq = rps->max_freq;
1752	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1753		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1754
1755	rps->efficient_freq = chv_rps_rpe_freq(rps);
1756	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1757		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1758
1759	rps->rp1_freq = chv_rps_guar_freq(rps);
1760	drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
1761		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1762
1763	rps->min_freq = chv_rps_min_freq(rps);
1764	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1765		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1766
1767	vlv_iosf_sb_put(i915,
1768			BIT(VLV_IOSF_SB_PUNIT) |
1769			BIT(VLV_IOSF_SB_NC) |
1770			BIT(VLV_IOSF_SB_CCK));
1771
1772	drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1773				   rps->rp1_freq | rps->min_freq) & 1,
1774		      "Odd GPU freq values\n");
1775}
1776
1777static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1778{
1779	ei->ktime = ktime_get_raw();
1780	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1781	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1782}
1783
1784static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1785{
1786	struct intel_uncore *uncore = rps_to_uncore(rps);
1787	const struct intel_rps_ei *prev = &rps->ei;
1788	struct intel_rps_ei now;
1789	u32 events = 0;
1790
1791	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1792		return 0;
1793
1794	vlv_c0_read(uncore, &now);
1795
1796	if (prev->ktime) {
1797		u64 time, c0;
1798		u32 render, media;
1799
1800		time = ktime_us_delta(now.ktime, prev->ktime);
1801
1802		time *= rps_to_i915(rps)->czclk_freq;
1803
1804		/* Workload can be split between render + media,
1805		 * e.g. SwapBuffers being blitted in X after being rendered in
1806		 * mesa. To account for this we need to combine both engines
1807		 * into our activity counter.
1808		 */
1809		render = now.render_c0 - prev->render_c0;
1810		media = now.media_c0 - prev->media_c0;
1811		c0 = max(render, media);
1812		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1813
1814		if (c0 > time * rps->power.up_threshold)
1815			events = GEN6_PM_RP_UP_THRESHOLD;
1816		else if (c0 < time * rps->power.down_threshold)
1817			events = GEN6_PM_RP_DOWN_THRESHOLD;
1818	}
1819
1820	rps->ei = now;
1821	return events;
1822}
1823
1824static void rps_work(struct work_struct *work)
1825{
1826	struct intel_rps *rps = container_of(work, typeof(*rps), work);
1827	struct intel_gt *gt = rps_to_gt(rps);
1828	struct drm_i915_private *i915 = rps_to_i915(rps);
1829	bool client_boost = false;
1830	int new_freq, adj, min, max;
1831	u32 pm_iir = 0;
1832
1833	spin_lock_irq(gt->irq_lock);
1834	pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
1835	client_boost = atomic_read(&rps->num_waiters);
1836	spin_unlock_irq(gt->irq_lock);
1837
1838	/* Make sure we didn't queue anything we're not going to process. */
1839	if (!pm_iir && !client_boost)
1840		goto out;
1841
1842	mutex_lock(&rps->lock);
1843	if (!intel_rps_is_active(rps)) {
1844		mutex_unlock(&rps->lock);
1845		return;
1846	}
1847
1848	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1849
1850	adj = rps->last_adj;
1851	new_freq = rps->cur_freq;
1852	min = rps->min_freq_softlimit;
1853	max = rps->max_freq_softlimit;
1854	if (client_boost)
1855		max = rps->max_freq;
1856
1857	GT_TRACE(gt,
1858		 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
1859		 pm_iir, str_yes_no(client_boost),
1860		 adj, new_freq, min, max);
1861
1862	if (client_boost && new_freq < rps->boost_freq) {
1863		new_freq = rps->boost_freq;
1864		adj = 0;
1865	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1866		if (adj > 0)
1867			adj *= 2;
1868		else /* CHV needs even encode values */
1869			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1870
1871		if (new_freq >= rps->max_freq_softlimit)
1872			adj = 0;
1873	} else if (client_boost) {
1874		adj = 0;
1875	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1876		if (rps->cur_freq > rps->efficient_freq)
1877			new_freq = rps->efficient_freq;
1878		else if (rps->cur_freq > rps->min_freq_softlimit)
1879			new_freq = rps->min_freq_softlimit;
1880		adj = 0;
1881	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1882		if (adj < 0)
1883			adj *= 2;
1884		else /* CHV needs even encode values */
1885			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1886
1887		if (new_freq <= rps->min_freq_softlimit)
1888			adj = 0;
1889	} else { /* unknown event */
1890		adj = 0;
1891	}
1892
1893	/*
1894	 * sysfs frequency limits may have snuck in while
1895	 * servicing the interrupt
1896	 */
1897	new_freq += adj;
1898	new_freq = clamp_t(int, new_freq, min, max);
1899
1900	if (intel_rps_set(rps, new_freq)) {
1901		drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
1902		adj = 0;
1903	}
1904	rps->last_adj = adj;
1905
1906	mutex_unlock(&rps->lock);
1907
1908out:
1909	spin_lock_irq(gt->irq_lock);
1910	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1911	spin_unlock_irq(gt->irq_lock);
1912}
1913
1914void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1915{
1916	struct intel_gt *gt = rps_to_gt(rps);
1917	const u32 events = rps->pm_events & pm_iir;
1918
1919	lockdep_assert_held(gt->irq_lock);
1920
1921	if (unlikely(!events))
1922		return;
1923
1924	GT_TRACE(gt, "irq events:%x\n", events);
1925
1926	gen6_gt_pm_mask_irq(gt, events);
1927
1928	rps->pm_iir |= events;
1929	schedule_work(&rps->work);
1930}
1931
1932void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1933{
1934	struct intel_gt *gt = rps_to_gt(rps);
1935	u32 events;
1936
1937	events = pm_iir & rps->pm_events;
1938	if (events) {
1939		spin_lock(gt->irq_lock);
1940
1941		GT_TRACE(gt, "irq events:%x\n", events);
1942
1943		gen6_gt_pm_mask_irq(gt, events);
1944		rps->pm_iir |= events;
1945
1946		schedule_work(&rps->work);
1947		spin_unlock(gt->irq_lock);
1948	}
1949
1950	if (GRAPHICS_VER(gt->i915) >= 8)
1951		return;
1952
1953	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1954		intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
1955
1956	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1957		drm_dbg(&rps_to_i915(rps)->drm,
1958			"Command parser error, pm_iir 0x%08x\n", pm_iir);
1959}
1960
1961void gen5_rps_irq_handler(struct intel_rps *rps)
1962{
1963	struct intel_uncore *uncore = rps_to_uncore(rps);
1964	u32 busy_up, busy_down, max_avg, min_avg;
1965	u8 new_freq;
1966
1967	spin_lock(&mchdev_lock);
1968
1969	intel_uncore_write16(uncore,
1970			     MEMINTRSTS,
1971			     intel_uncore_read(uncore, MEMINTRSTS));
1972
1973	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1974	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1975	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1976	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1977	min_avg = intel_uncore_read(uncore, RCBMINAVG);
1978
1979	/* Handle RCS change request from hw */
1980	new_freq = rps->cur_freq;
1981	if (busy_up > max_avg)
1982		new_freq++;
1983	else if (busy_down < min_avg)
1984		new_freq--;
1985	new_freq = clamp(new_freq,
1986			 rps->min_freq_softlimit,
1987			 rps->max_freq_softlimit);
1988
1989	if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
1990		rps->cur_freq = new_freq;
1991
1992	spin_unlock(&mchdev_lock);
1993}
1994
1995void intel_rps_init_early(struct intel_rps *rps)
1996{
1997	mutex_init(&rps->lock);
1998	mutex_init(&rps->power.mutex);
1999
2000	INIT_WORK(&rps->work, rps_work);
2001	timer_setup(&rps->timer, rps_timer, 0);
2002
2003	atomic_set(&rps->num_waiters, 0);
2004}
2005
2006void intel_rps_init(struct intel_rps *rps)
2007{
2008	struct drm_i915_private *i915 = rps_to_i915(rps);
2009
2010	if (rps_uses_slpc(rps))
2011		return;
2012
2013	if (IS_CHERRYVIEW(i915))
2014		chv_rps_init(rps);
2015	else if (IS_VALLEYVIEW(i915))
2016		vlv_rps_init(rps);
2017	else if (GRAPHICS_VER(i915) >= 6)
2018		gen6_rps_init(rps);
2019	else if (IS_IRONLAKE_M(i915))
2020		gen5_rps_init(rps);
2021
2022	/* Derive initial user preferences/limits from the hardware limits */
2023	rps->max_freq_softlimit = rps->max_freq;
2024	rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit;
2025	rps->min_freq_softlimit = rps->min_freq;
2026	rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit;
2027
2028	/* After setting max-softlimit, find the overclock max freq */
2029	if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
2030		u32 params = 0;
2031
2032		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_READ_OC_PARAMS, &params, NULL);
2033		if (params & BIT(31)) { /* OC supported */
2034			drm_dbg(&i915->drm,
2035				"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
2036				(rps->max_freq & 0xff) * 50,
2037				(params & 0xff) * 50);
2038			rps->max_freq = params & 0xff;
2039		}
2040	}
2041
2042	/* Finally allow us to boost to max by default */
2043	rps->boost_freq = rps->max_freq;
2044	rps->idle_freq = rps->min_freq;
2045
2046	/* Start in the middle, from here we will autotune based on workload */
2047	rps->cur_freq = rps->efficient_freq;
2048
2049	rps->pm_intrmsk_mbz = 0;
2050
2051	/*
2052	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
2053	 * if GEN6_PM_UP_EI_EXPIRED is masked.
2054	 *
2055	 * TODO: verify if this can be reproduced on VLV,CHV.
2056	 */
2057	if (GRAPHICS_VER(i915) <= 7)
2058		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
2059
2060	if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
2061		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
2062
2063	/* GuC needs ARAT expired interrupt unmasked */
2064	if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc))
2065		rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
2066}
2067
2068void intel_rps_sanitize(struct intel_rps *rps)
2069{
2070	if (rps_uses_slpc(rps))
2071		return;
2072
2073	if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
2074		rps_disable_interrupts(rps);
2075}
2076
2077u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
2078{
2079	struct drm_i915_private *i915 = rps_to_i915(rps);
2080	i915_reg_t rpstat;
2081
2082	rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
2083
2084	return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
2085}
2086
2087u32 intel_rps_read_rpstat(struct intel_rps *rps)
2088{
2089	struct drm_i915_private *i915 = rps_to_i915(rps);
2090	i915_reg_t rpstat;
2091
2092	rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
2093
2094	return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
2095}
2096
2097u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
2098{
2099	struct drm_i915_private *i915 = rps_to_i915(rps);
2100	u32 cagf;
2101
2102	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
2103		cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
2104	else if (GRAPHICS_VER(i915) >= 12)
2105		cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
2106	else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
2107		cagf = REG_FIELD_GET(RPE_MASK, rpstat);
2108	else if (GRAPHICS_VER(i915) >= 9)
2109		cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
2110	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
2111		cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
2112	else if (GRAPHICS_VER(i915) >= 6)
2113		cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
2114	else
2115		cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat));
2116
2117	return cagf;
2118}
2119
2120static u32 read_cagf(struct intel_rps *rps)
2121{
2122	struct drm_i915_private *i915 = rps_to_i915(rps);
2123	struct intel_uncore *uncore = rps_to_uncore(rps);
2124	u32 freq;
2125
2126	/*
2127	 * For Gen12+ reading freq from HW does not need a forcewake and
2128	 * registers will return 0 freq when GT is in RC6
2129	 */
2130	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
2131		freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
2132	} else if (GRAPHICS_VER(i915) >= 12) {
2133		freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
2134	} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
2135		vlv_punit_get(i915);
2136		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
2137		vlv_punit_put(i915);
2138	} else if (GRAPHICS_VER(i915) >= 6) {
2139		freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
2140	} else {
2141		freq = intel_uncore_read(uncore, MEMSTAT_ILK);
2142	}
2143
2144	return intel_rps_get_cagf(rps, freq);
2145}
2146
2147u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
2148{
2149	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
2150	intel_wakeref_t wakeref;
2151	u32 freq = 0;
2152
2153	with_intel_runtime_pm_if_in_use(rpm, wakeref)
2154		freq = intel_gpu_freq(rps, read_cagf(rps));
2155
2156	return freq;
2157}
2158
2159u32 intel_rps_read_punit_req(struct intel_rps *rps)
2160{
2161	struct intel_uncore *uncore = rps_to_uncore(rps);
2162	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
2163	intel_wakeref_t wakeref;
2164	u32 freq = 0;
2165
2166	with_intel_runtime_pm_if_in_use(rpm, wakeref)
2167		freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
2168
2169	return freq;
2170}
2171
2172static u32 intel_rps_get_req(u32 pureq)
2173{
2174	u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
2175
2176	return req;
2177}
2178
2179u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
2180{
2181	u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
2182
2183	return intel_gpu_freq(rps, freq);
2184}
2185
2186u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
2187{
2188	if (rps_uses_slpc(rps))
2189		return intel_rps_read_punit_req_frequency(rps);
2190	else
2191		return intel_gpu_freq(rps, rps->cur_freq);
2192}
2193
2194u32 intel_rps_get_max_frequency(struct intel_rps *rps)
2195{
2196	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2197
2198	if (rps_uses_slpc(rps))
2199		return slpc->max_freq_softlimit;
2200	else
2201		return intel_gpu_freq(rps, rps->max_freq_softlimit);
2202}
2203
2204/**
2205 * intel_rps_get_max_raw_freq - returns the max frequency in some raw format.
2206 * @rps: the intel_rps structure
2207 *
2208 * Returns the max frequency in a raw format. In newer platforms raw is in
2209 * units of 50 MHz.
2210 */
2211u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)
2212{
2213	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2214	u32 freq;
2215
2216	if (rps_uses_slpc(rps)) {
2217		return DIV_ROUND_CLOSEST(slpc->rp0_freq,
2218					 GT_FREQUENCY_MULTIPLIER);
2219	} else {
2220		freq = rps->max_freq;
2221		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
2222			/* Convert GT frequency to 50 MHz units */
2223			freq /= GEN9_FREQ_SCALER;
2224		}
2225		return freq;
2226	}
2227}
2228
2229u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
2230{
2231	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2232
2233	if (rps_uses_slpc(rps))
2234		return slpc->rp0_freq;
2235	else
2236		return intel_gpu_freq(rps, rps->rp0_freq);
2237}
2238
2239u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
2240{
2241	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2242
2243	if (rps_uses_slpc(rps))
2244		return slpc->rp1_freq;
2245	else
2246		return intel_gpu_freq(rps, rps->rp1_freq);
2247}
2248
2249u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
2250{
2251	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2252
2253	if (rps_uses_slpc(rps))
2254		return slpc->min_freq;
2255	else
2256		return intel_gpu_freq(rps, rps->min_freq);
2257}
2258
2259static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2260{
2261	struct intel_gt *gt = rps_to_gt(rps);
2262	struct drm_i915_private *i915 = gt->i915;
2263	struct intel_uncore *uncore = gt->uncore;
2264	struct intel_rps_freq_caps caps;
2265	u32 rp_state_limits;
2266	u32 gt_perf_status;
2267	u32 rpmodectl, rpinclimit, rpdeclimit;
2268	u32 rpstat, cagf, reqf;
2269	u32 rpcurupei, rpcurup, rpprevup;
2270	u32 rpcurdownei, rpcurdown, rpprevdown;
2271	u32 rpupei, rpupt, rpdownei, rpdownt;
2272	u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
2273
2274	rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
2275	gen6_rps_get_freq_caps(rps, &caps);
2276	if (IS_GEN9_LP(i915))
2277		gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
2278	else
2279		gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
2280
2281	/* RPSTAT1 is in the GT power well */
2282	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
2283
2284	reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
2285	if (GRAPHICS_VER(i915) >= 9) {
2286		reqf >>= 23;
2287	} else {
2288		reqf &= ~GEN6_TURBO_DISABLE;
2289		if (IS_HASWELL(i915) || IS_BROADWELL(i915))
2290			reqf >>= 24;
2291		else
2292			reqf >>= 25;
2293	}
2294	reqf = intel_gpu_freq(rps, reqf);
2295
2296	rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
2297	rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
2298	rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
2299
2300	rpstat = intel_rps_read_rpstat(rps);
2301	rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
2302	rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
2303	rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
2304	rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
2305	rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
2306	rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
2307
2308	rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
2309	rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
2310
2311	rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
2312	rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
2313
2314	cagf = intel_rps_read_actual_frequency(rps);
2315
2316	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
2317
2318	if (GRAPHICS_VER(i915) >= 11) {
2319		pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
2320		pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
2321		/*
2322		 * The equivalent to the PM ISR & IIR cannot be read
2323		 * without affecting the current state of the system
2324		 */
2325		pm_isr = 0;
2326		pm_iir = 0;
2327	} else if (GRAPHICS_VER(i915) >= 8) {
2328		pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
2329		pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
2330		pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
2331		pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
2332	} else {
2333		pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
2334		pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
2335		pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
2336		pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
2337	}
2338	pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
2339
2340	drm_printf(p, "Video Turbo Mode: %s\n",
2341		   str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
2342	drm_printf(p, "HW control enabled: %s\n",
2343		   str_yes_no(rpmodectl & GEN6_RP_ENABLE));
2344	drm_printf(p, "SW control enabled: %s\n",
2345		   str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
2346
2347	drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
2348		   pm_ier, pm_imr, pm_mask);
2349	if (GRAPHICS_VER(i915) <= 10)
2350		drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
2351			   pm_isr, pm_iir);
2352	drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
2353		   rps->pm_intrmsk_mbz);
2354	drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
2355	drm_printf(p, "Render p-state ratio: %d\n",
2356		   (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
2357	drm_printf(p, "Render p-state VID: %d\n",
2358		   gt_perf_status & 0xff);
2359	drm_printf(p, "Render p-state limit: %d\n",
2360		   rp_state_limits & 0xff);
2361	drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
2362	drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
2363	drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
2364	drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
2365	drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
2366	drm_printf(p, "CAGF: %dMHz\n", cagf);
2367	drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
2368		   rpcurupei,
2369		   intel_gt_pm_interval_to_ns(gt, rpcurupei));
2370	drm_printf(p, "RP CUR UP: %d (%lldns)\n",
2371		   rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
2372	drm_printf(p, "RP PREV UP: %d (%lldns)\n",
2373		   rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
2374	drm_printf(p, "Up threshold: %d%%\n",
2375		   rps->power.up_threshold);
2376	drm_printf(p, "RP UP EI: %d (%lldns)\n",
2377		   rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
2378	drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
2379		   rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
2380
2381	drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
2382		   rpcurdownei,
2383		   intel_gt_pm_interval_to_ns(gt, rpcurdownei));
2384	drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
2385		   rpcurdown,
2386		   intel_gt_pm_interval_to_ns(gt, rpcurdown));
2387	drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
2388		   rpprevdown,
2389		   intel_gt_pm_interval_to_ns(gt, rpprevdown));
2390	drm_printf(p, "Down threshold: %d%%\n",
2391		   rps->power.down_threshold);
2392	drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
2393		   rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
2394	drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
2395		   rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
2396
2397	drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
2398		   intel_gpu_freq(rps, caps.min_freq));
2399	drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
2400		   intel_gpu_freq(rps, caps.rp1_freq));
2401	drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
2402		   intel_gpu_freq(rps, caps.rp0_freq));
2403	drm_printf(p, "Max overclocked frequency: %dMHz\n",
2404		   intel_gpu_freq(rps, rps->max_freq));
2405
2406	drm_printf(p, "Current freq: %d MHz\n",
2407		   intel_gpu_freq(rps, rps->cur_freq));
2408	drm_printf(p, "Actual freq: %d MHz\n", cagf);
2409	drm_printf(p, "Idle freq: %d MHz\n",
2410		   intel_gpu_freq(rps, rps->idle_freq));
2411	drm_printf(p, "Min freq: %d MHz\n",
2412		   intel_gpu_freq(rps, rps->min_freq));
2413	drm_printf(p, "Boost freq: %d MHz\n",
2414		   intel_gpu_freq(rps, rps->boost_freq));
2415	drm_printf(p, "Max freq: %d MHz\n",
2416		   intel_gpu_freq(rps, rps->max_freq));
2417	drm_printf(p,
2418		   "efficient (RPe) frequency: %d MHz\n",
2419		   intel_gpu_freq(rps, rps->efficient_freq));
2420}
2421
2422static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2423{
2424	struct intel_gt *gt = rps_to_gt(rps);
2425	struct intel_uncore *uncore = gt->uncore;
2426	struct intel_rps_freq_caps caps;
2427	u32 pm_mask;
2428
2429	gen6_rps_get_freq_caps(rps, &caps);
2430	pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
2431
2432	drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
2433	drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
2434		   rps->pm_intrmsk_mbz);
2435	drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
2436	drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
2437	drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
2438		   intel_gpu_freq(rps, caps.min_freq));
2439	drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
2440		   intel_gpu_freq(rps, caps.rp1_freq));
2441	drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
2442		   intel_gpu_freq(rps, caps.rp0_freq));
2443	drm_printf(p, "Current freq: %d MHz\n",
2444		   intel_rps_get_requested_frequency(rps));
2445	drm_printf(p, "Actual freq: %d MHz\n",
2446		   intel_rps_read_actual_frequency(rps));
2447	drm_printf(p, "Min freq: %d MHz\n",
2448		   intel_rps_get_min_frequency(rps));
2449	drm_printf(p, "Boost freq: %d MHz\n",
2450		   intel_rps_get_boost_frequency(rps));
2451	drm_printf(p, "Max freq: %d MHz\n",
2452		   intel_rps_get_max_frequency(rps));
2453	drm_printf(p,
2454		   "efficient (RPe) frequency: %d MHz\n",
2455		   intel_gpu_freq(rps, caps.rp1_freq));
2456}
2457
2458void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
2459{
2460	if (rps_uses_slpc(rps))
2461		return slpc_frequency_dump(rps, p);
2462	else
2463		return rps_frequency_dump(rps, p);
2464}
2465
2466static int set_max_freq(struct intel_rps *rps, u32 val)
2467{
2468	struct drm_i915_private *i915 = rps_to_i915(rps);
2469	int ret = 0;
2470
2471	mutex_lock(&rps->lock);
2472
2473	val = intel_freq_opcode(rps, val);
2474	if (val < rps->min_freq ||
2475	    val > rps->max_freq ||
2476	    val < rps->min_freq_softlimit) {
2477		ret = -EINVAL;
2478		goto unlock;
2479	}
2480
2481	if (val > rps->rp0_freq)
2482		drm_dbg(&i915->drm, "User requested overclocking to %d\n",
2483			intel_gpu_freq(rps, val));
2484
2485	rps->max_freq_softlimit = val;
2486
2487	val = clamp_t(int, rps->cur_freq,
2488		      rps->min_freq_softlimit,
2489		      rps->max_freq_softlimit);
2490
2491	/*
2492	 * We still need *_set_rps to process the new max_delay and
2493	 * update the interrupt limits and PMINTRMSK even though
2494	 * frequency request may be unchanged.
2495	 */
2496	intel_rps_set(rps, val);
2497
2498unlock:
2499	mutex_unlock(&rps->lock);
2500
2501	return ret;
2502}
2503
2504int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
2505{
2506	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2507
2508	if (rps_uses_slpc(rps))
2509		return intel_guc_slpc_set_max_freq(slpc, val);
2510	else
2511		return set_max_freq(rps, val);
2512}
2513
2514u32 intel_rps_get_min_frequency(struct intel_rps *rps)
2515{
2516	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2517
2518	if (rps_uses_slpc(rps))
2519		return slpc->min_freq_softlimit;
2520	else
2521		return intel_gpu_freq(rps, rps->min_freq_softlimit);
2522}
2523
2524/**
2525 * intel_rps_get_min_raw_freq - returns the min frequency in some raw format.
2526 * @rps: the intel_rps structure
2527 *
2528 * Returns the min frequency in a raw format. In newer platforms raw is in
2529 * units of 50 MHz.
2530 */
2531u32 intel_rps_get_min_raw_freq(struct intel_rps *rps)
2532{
2533	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2534	u32 freq;
2535
2536	if (rps_uses_slpc(rps)) {
2537		return DIV_ROUND_CLOSEST(slpc->min_freq,
2538					 GT_FREQUENCY_MULTIPLIER);
2539	} else {
2540		freq = rps->min_freq;
2541		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
2542			/* Convert GT frequency to 50 MHz units */
2543			freq /= GEN9_FREQ_SCALER;
2544		}
2545		return freq;
2546	}
2547}
2548
2549static int set_min_freq(struct intel_rps *rps, u32 val)
2550{
2551	int ret = 0;
2552
2553	mutex_lock(&rps->lock);
2554
2555	val = intel_freq_opcode(rps, val);
2556	if (val < rps->min_freq ||
2557	    val > rps->max_freq ||
2558	    val > rps->max_freq_softlimit) {
2559		ret = -EINVAL;
2560		goto unlock;
2561	}
2562
2563	rps->min_freq_softlimit = val;
2564
2565	val = clamp_t(int, rps->cur_freq,
2566		      rps->min_freq_softlimit,
2567		      rps->max_freq_softlimit);
2568
2569	/*
2570	 * We still need *_set_rps to process the new min_delay and
2571	 * update the interrupt limits and PMINTRMSK even though
2572	 * frequency request may be unchanged.
2573	 */
2574	intel_rps_set(rps, val);
2575
2576unlock:
2577	mutex_unlock(&rps->lock);
2578
2579	return ret;
2580}
2581
2582int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
2583{
2584	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2585
2586	if (rps_uses_slpc(rps))
2587		return intel_guc_slpc_set_min_freq(slpc, val);
2588	else
2589		return set_min_freq(rps, val);
2590}
2591
2592static void intel_rps_set_manual(struct intel_rps *rps, bool enable)
2593{
2594	struct intel_uncore *uncore = rps_to_uncore(rps);
2595	u32 state = enable ? GEN9_RPSWCTL_ENABLE : GEN9_RPSWCTL_DISABLE;
2596
2597	/* Allow punit to process software requests */
2598	intel_uncore_write(uncore, GEN6_RP_CONTROL, state);
2599}
2600
2601void intel_rps_raise_unslice(struct intel_rps *rps)
2602{
2603	struct intel_uncore *uncore = rps_to_uncore(rps);
2604
2605	mutex_lock(&rps->lock);
2606
2607	if (rps_uses_slpc(rps)) {
2608		/* RP limits have not been initialized yet for SLPC path */
2609		struct intel_rps_freq_caps caps;
2610
2611		gen6_rps_get_freq_caps(rps, &caps);
2612
2613		intel_rps_set_manual(rps, true);
2614		intel_uncore_write(uncore, GEN6_RPNSWREQ,
2615				   ((caps.rp0_freq <<
2616				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
2617				   GEN9_IGNORE_SLICE_RATIO));
2618		intel_rps_set_manual(rps, false);
2619	} else {
2620		intel_rps_set(rps, rps->rp0_freq);
2621	}
2622
2623	mutex_unlock(&rps->lock);
2624}
2625
2626void intel_rps_lower_unslice(struct intel_rps *rps)
2627{
2628	struct intel_uncore *uncore = rps_to_uncore(rps);
2629
2630	mutex_lock(&rps->lock);
2631
2632	if (rps_uses_slpc(rps)) {
2633		/* RP limits have not been initialized yet for SLPC path */
2634		struct intel_rps_freq_caps caps;
2635
2636		gen6_rps_get_freq_caps(rps, &caps);
2637
2638		intel_rps_set_manual(rps, true);
2639		intel_uncore_write(uncore, GEN6_RPNSWREQ,
2640				   ((caps.min_freq <<
2641				   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT) |
2642				   GEN9_IGNORE_SLICE_RATIO));
2643		intel_rps_set_manual(rps, false);
2644	} else {
2645		intel_rps_set(rps, rps->min_freq);
2646	}
2647
2648	mutex_unlock(&rps->lock);
2649}
2650
2651static u32 rps_read_mmio(struct intel_rps *rps, i915_reg_t reg32)
2652{
2653	struct intel_gt *gt = rps_to_gt(rps);
2654	intel_wakeref_t wakeref;
2655	u32 val;
2656
2657	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
2658		val = intel_uncore_read(gt->uncore, reg32);
2659
2660	return val;
2661}
2662
2663bool rps_read_mask_mmio(struct intel_rps *rps,
2664			i915_reg_t reg32, u32 mask)
2665{
2666	return rps_read_mmio(rps, reg32) & mask;
2667}
2668
2669/* External interface for intel_ips.ko */
2670
2671static struct drm_i915_private __rcu *ips_mchdev;
2672
2673/**
2674 * Tells the intel_ips driver that the i915 driver is now loaded, if
2675 * IPS got loaded first.
2676 *
2677 * This awkward dance is so that neither module has to depend on the
2678 * other in order for IPS to do the appropriate communication of
2679 * GPU turbo limits to i915.
2680 */
2681static void
2682ips_ping_for_i915_load(void)
2683{
2684	void (*link)(void);
2685
2686	link = symbol_get(ips_link_to_i915_driver);
2687	if (link) {
2688		link();
2689		symbol_put(ips_link_to_i915_driver);
2690	}
2691}
2692
2693void intel_rps_driver_register(struct intel_rps *rps)
2694{
2695	struct intel_gt *gt = rps_to_gt(rps);
2696
2697	/*
2698	 * We only register the i915 ips part with intel-ips once everything is
2699	 * set up, to avoid intel-ips sneaking in and reading bogus values.
2700	 */
2701	if (GRAPHICS_VER(gt->i915) == 5) {
2702		GEM_BUG_ON(ips_mchdev);
2703		rcu_assign_pointer(ips_mchdev, gt->i915);
2704		ips_ping_for_i915_load();
2705	}
2706}
2707
2708void intel_rps_driver_unregister(struct intel_rps *rps)
2709{
2710	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
2711		rcu_assign_pointer(ips_mchdev, NULL);
2712}
2713
2714static struct drm_i915_private *mchdev_get(void)
2715{
2716	struct drm_i915_private *i915;
2717
2718	rcu_read_lock();
2719	i915 = rcu_dereference(ips_mchdev);
2720	if (i915 && !kref_get_unless_zero(&i915->drm.ref))
2721		i915 = NULL;
2722	rcu_read_unlock();
2723
2724	return i915;
2725}
2726
2727/**
2728 * i915_read_mch_val - return value for IPS use
2729 *
2730 * Calculate and return a value for the IPS driver to use when deciding whether
2731 * we have thermal and power headroom to increase CPU or GPU power budget.
2732 */
2733unsigned long i915_read_mch_val(void)
2734{
2735	struct drm_i915_private *i915;
2736	unsigned long chipset_val = 0;
2737	unsigned long graphics_val = 0;
2738	intel_wakeref_t wakeref;
2739
2740	i915 = mchdev_get();
2741	if (!i915)
2742		return 0;
2743
2744	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2745		struct intel_ips *ips = &to_gt(i915)->rps.ips;
2746
2747		spin_lock_irq(&mchdev_lock);
2748		chipset_val = __ips_chipset_val(ips);
2749		graphics_val = __ips_gfx_val(ips);
2750		spin_unlock_irq(&mchdev_lock);
2751	}
2752
2753	drm_dev_put(&i915->drm);
2754	return chipset_val + graphics_val;
2755}
2756EXPORT_SYMBOL_GPL(i915_read_mch_val);
2757
2758/**
2759 * i915_gpu_raise - raise GPU frequency limit
2760 *
2761 * Raise the limit; IPS indicates we have thermal headroom.
2762 */
2763bool i915_gpu_raise(void)
2764{
2765	struct drm_i915_private *i915;
2766	struct intel_rps *rps;
2767
2768	i915 = mchdev_get();
2769	if (!i915)
2770		return false;
2771
2772	rps = &to_gt(i915)->rps;
2773
2774	spin_lock_irq(&mchdev_lock);
2775	if (rps->max_freq_softlimit < rps->max_freq)
2776		rps->max_freq_softlimit++;
2777	spin_unlock_irq(&mchdev_lock);
2778
2779	drm_dev_put(&i915->drm);
2780	return true;
2781}
2782EXPORT_SYMBOL_GPL(i915_gpu_raise);
2783
2784/**
2785 * i915_gpu_lower - lower GPU frequency limit
2786 *
2787 * IPS indicates we're close to a thermal limit, so throttle back the GPU
2788 * frequency maximum.
2789 */
2790bool i915_gpu_lower(void)
2791{
2792	struct drm_i915_private *i915;
2793	struct intel_rps *rps;
2794
2795	i915 = mchdev_get();
2796	if (!i915)
2797		return false;
2798
2799	rps = &to_gt(i915)->rps;
2800
2801	spin_lock_irq(&mchdev_lock);
2802	if (rps->max_freq_softlimit > rps->min_freq)
2803		rps->max_freq_softlimit--;
2804	spin_unlock_irq(&mchdev_lock);
2805
2806	drm_dev_put(&i915->drm);
2807	return true;
2808}
2809EXPORT_SYMBOL_GPL(i915_gpu_lower);
2810
2811/**
2812 * i915_gpu_busy - indicate GPU business to IPS
2813 *
2814 * Tell the IPS driver whether or not the GPU is busy.
2815 */
2816bool i915_gpu_busy(void)
2817{
2818	struct drm_i915_private *i915;
2819	bool ret;
2820
2821	i915 = mchdev_get();
2822	if (!i915)
2823		return false;
2824
2825	ret = to_gt(i915)->awake;
2826
2827	drm_dev_put(&i915->drm);
2828	return ret;
2829}
2830EXPORT_SYMBOL_GPL(i915_gpu_busy);
2831
2832/**
2833 * i915_gpu_turbo_disable - disable graphics turbo
2834 *
2835 * Disable graphics turbo by resetting the max frequency and setting the
2836 * current frequency to the default.
2837 */
2838bool i915_gpu_turbo_disable(void)
2839{
2840	struct drm_i915_private *i915;
2841	struct intel_rps *rps;
2842	bool ret;
2843
2844	i915 = mchdev_get();
2845	if (!i915)
2846		return false;
2847
2848	rps = &to_gt(i915)->rps;
2849
2850	spin_lock_irq(&mchdev_lock);
2851	rps->max_freq_softlimit = rps->min_freq;
2852	ret = !__gen5_rps_set(&to_gt(i915)->rps, rps->min_freq);
2853	spin_unlock_irq(&mchdev_lock);
2854
2855	drm_dev_put(&i915->drm);
2856	return ret;
2857}
2858EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
2859
2860#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2861#include "selftest_rps.c"
2862#include "selftest_slpc.c"
2863#endif