Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014-2018 Intel Corporation
   4 */
   5
   6#include "i915_drv.h"
   7#include "i915_reg.h"
   8#include "intel_context.h"
   9#include "intel_engine_pm.h"
  10#include "intel_engine_regs.h"
  11#include "intel_gpu_commands.h"
  12#include "intel_gt.h"
  13#include "intel_gt_mcr.h"
  14#include "intel_gt_print.h"
  15#include "intel_gt_regs.h"
  16#include "intel_ring.h"
  17#include "intel_workarounds.h"
  18
  19/**
  20 * DOC: Hardware workarounds
  21 *
  22 * Hardware workarounds are register programming documented to be executed in
  23 * the driver that fall outside of the normal programming sequences for a
  24 * platform. There are some basic categories of workarounds, depending on
  25 * how/when they are applied:
  26 *
  27 * - Context workarounds: workarounds that touch registers that are
  28 *   saved/restored to/from the HW context image. The list is emitted (via Load
  29 *   Register Immediate commands) once when initializing the device and saved in
  30 *   the default context. That default context is then used on every context
  31 *   creation to have a "primed golden context", i.e. a context image that
  32 *   already contains the changes needed to all the registers.
  33 *
  34 *   Context workarounds should be implemented in the \*_ctx_workarounds_init()
  35 *   variants respective to the targeted platforms.
  36 *
  37 * - Engine workarounds: the list of these WAs is applied whenever the specific
  38 *   engine is reset. It's also possible that a set of engine classes share a
  39 *   common power domain and they are reset together. This happens on some
  40 *   platforms with render and compute engines. In this case (at least) one of
  41 *   them need to keeep the workaround programming: the approach taken in the
  42 *   driver is to tie those workarounds to the first compute/render engine that
  43 *   is registered.  When executing with GuC submission, engine resets are
  44 *   outside of kernel driver control, hence the list of registers involved in
  45 *   written once, on engine initialization, and then passed to GuC, that
  46 *   saves/restores their values before/after the reset takes place. See
  47 *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
  48 *
  49 *   Workarounds for registers specific to RCS and CCS should be implemented in
  50 *   rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
  51 *   registers belonging to BCS, VCS or VECS should be implemented in
  52 *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
  53 *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
  54 *   should be implemented in general_render_compute_wa_init().
  55 *
  56 * - GT workarounds: the list of these WAs is applied whenever these registers
  57 *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
  58 *
  59 *   GT workarounds should be implemented in the \*_gt_workarounds_init()
  60 *   variants respective to the targeted platforms.
  61 *
  62 * - Register whitelist: some workarounds need to be implemented in userspace,
  63 *   but need to touch privileged registers. The whitelist in the kernel
  64 *   instructs the hardware to allow the access to happen. From the kernel side,
  65 *   this is just a special case of a MMIO workaround (as we write the list of
  66 *   these to/be-whitelisted registers to some special HW registers).
  67 *
  68 *   Register whitelisting should be done in the \*_whitelist_build() variants
  69 *   respective to the targeted platforms.
  70 *
  71 * - Workaround batchbuffers: buffers that get executed automatically by the
  72 *   hardware on every HW context restore. These buffers are created and
  73 *   programmed in the default context so the hardware always go through those
  74 *   programming sequences when switching contexts. The support for workaround
  75 *   batchbuffers is enabled these hardware mechanisms:
  76 *
  77 *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
  78 *      context, pointing the hardware to jump to that location when that offset
  79 *      is reached in the context restore. Workaround batchbuffer in the driver
  80 *      currently uses this mechanism for all platforms.
  81 *
  82 *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
  83 *      pointing the hardware to a buffer to continue executing after the
  84 *      engine registers are restored in a context restore sequence. This is
  85 *      currently not used in the driver.
  86 *
  87 * - Other:  There are WAs that, due to their nature, cannot be applied from a
  88 *   central place. Those are peppered around the rest of the code, as needed.
  89 *   Workarounds related to the display IP are the main example.
  90 *
  91 * .. [1] Technically, some registers are powercontext saved & restored, so they
  92 *    survive a suspend/resume. In practice, writing them again is not too
  93 *    costly and simplifies things, so it's the approach taken in the driver.
  94 */
  95
  96static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
  97			  const char *name, const char *engine_name)
  98{
  99	wal->gt = gt;
 100	wal->name = name;
 101	wal->engine_name = engine_name;
 102}
 103
 104#define WA_LIST_CHUNK (1 << 4)
 105
 106static void wa_init_finish(struct i915_wa_list *wal)
 107{
 108	/* Trim unused entries. */
 109	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
 110		struct i915_wa *list = kmemdup(wal->list,
 111					       wal->count * sizeof(*list),
 112					       GFP_KERNEL);
 113
 114		if (list) {
 115			kfree(wal->list);
 116			wal->list = list;
 117		}
 118	}
 119
 120	if (!wal->count)
 121		return;
 122
 123	gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n",
 124	       wal->wa_count, wal->name, wal->engine_name);
 125}
 126
 127static enum forcewake_domains
 128wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 129{
 130	enum forcewake_domains fw = 0;
 131	struct i915_wa *wa;
 132	unsigned int i;
 133
 134	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 135		fw |= intel_uncore_forcewake_for_reg(uncore,
 136						     wa->reg,
 137						     FW_REG_READ |
 138						     FW_REG_WRITE);
 139
 140	return fw;
 141}
 142
 143static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 144{
 145	unsigned int addr = i915_mmio_reg_offset(wa->reg);
 146	struct drm_i915_private *i915 = wal->gt->i915;
 147	unsigned int start = 0, end = wal->count;
 148	const unsigned int grow = WA_LIST_CHUNK;
 149	struct i915_wa *wa_;
 150
 151	GEM_BUG_ON(!is_power_of_2(grow));
 152
 153	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
 154		struct i915_wa *list;
 155
 156		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
 157				     GFP_KERNEL);
 158		if (!list) {
 159			drm_err(&i915->drm, "No space for workaround init!\n");
 160			return;
 161		}
 162
 163		if (wal->list) {
 164			memcpy(list, wal->list, sizeof(*wa) * wal->count);
 165			kfree(wal->list);
 166		}
 167
 168		wal->list = list;
 169	}
 170
 171	while (start < end) {
 172		unsigned int mid = start + (end - start) / 2;
 173
 174		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
 175			start = mid + 1;
 176		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
 177			end = mid;
 178		} else {
 179			wa_ = &wal->list[mid];
 180
 181			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
 182				drm_err(&i915->drm,
 183					"Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
 184					i915_mmio_reg_offset(wa_->reg),
 185					wa_->clr, wa_->set);
 186
 187				wa_->set &= ~wa->clr;
 188			}
 189
 190			wal->wa_count++;
 191			wa_->set |= wa->set;
 192			wa_->clr |= wa->clr;
 193			wa_->read |= wa->read;
 194			return;
 195		}
 196	}
 197
 198	wal->wa_count++;
 199	wa_ = &wal->list[wal->count++];
 200	*wa_ = *wa;
 201
 202	while (wa_-- > wal->list) {
 203		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
 204			   i915_mmio_reg_offset(wa_[1].reg));
 205		if (i915_mmio_reg_offset(wa_[1].reg) >
 206		    i915_mmio_reg_offset(wa_[0].reg))
 207			break;
 208
 209		swap(wa_[1], wa_[0]);
 210	}
 211}
 212
 213static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
 214		   u32 clear, u32 set, u32 read_mask, bool masked_reg)
 215{
 216	struct i915_wa wa = {
 217		.reg  = reg,
 218		.clr  = clear,
 219		.set  = set,
 220		.read = read_mask,
 221		.masked_reg = masked_reg,
 222	};
 223
 224	_wa_add(wal, &wa);
 225}
 226
 227static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
 228		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
 229{
 230	struct i915_wa wa = {
 231		.mcr_reg = reg,
 232		.clr  = clear,
 233		.set  = set,
 234		.read = read_mask,
 235		.masked_reg = masked_reg,
 236		.is_mcr = 1,
 237	};
 238
 239	_wa_add(wal, &wa);
 240}
 241
 242static void
 243wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 244{
 245	wa_add(wal, reg, clear, set, clear | set, false);
 246}
 247
 248static void
 249wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
 250{
 251	wa_mcr_add(wal, reg, clear, set, clear | set, false);
 252}
 253
 254static void
 255wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 256{
 257	wa_write_clr_set(wal, reg, ~0, set);
 258}
 259
 260static void
 261wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
 262{
 263	wa_mcr_write_clr_set(wal, reg, ~0, set);
 264}
 265
 266static void
 267wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 268{
 269	wa_write_clr_set(wal, reg, set, set);
 270}
 271
 272static void
 273wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
 274{
 275	wa_mcr_write_clr_set(wal, reg, set, set);
 276}
 277
 278static void
 279wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 280{
 281	wa_write_clr_set(wal, reg, clr, 0);
 282}
 283
 284static void
 285wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
 286{
 287	wa_mcr_write_clr_set(wal, reg, clr, 0);
 288}
 289
 290/*
 291 * WA operations on "masked register". A masked register has the upper 16 bits
 292 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
 293 * portion of the register without a rmw: you simply write in the upper 16 bits
 294 * the mask of bits you are going to modify.
 295 *
 296 * The wa_masked_* family of functions already does the necessary operations to
 297 * calculate the mask based on the parameters passed, so user only has to
 298 * provide the lower 16 bits of that register.
 299 */
 300
 301static void
 302wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 303{
 304	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 305}
 306
 307static void
 308wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 309{
 310	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 311}
 312
 313static void
 314wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 315{
 316	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 317}
 318
 319static void
 320wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 321{
 322	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 323}
 324
 325static void
 326wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
 327		    u32 mask, u32 val)
 328{
 329	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 330}
 331
 332static void
 333wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
 334			u32 mask, u32 val)
 335{
 336	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 337}
 338
 339static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
 340				      struct i915_wa_list *wal)
 341{
 342	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 343}
 344
 345static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
 346				      struct i915_wa_list *wal)
 347{
 348	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 349}
 350
 351static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 352				      struct i915_wa_list *wal)
 353{
 354	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 355
 356	/* WaDisableAsyncFlipPerfMode:bdw,chv */
 357	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 358
 359	/* WaDisablePartialInstShootdown:bdw,chv */
 360	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 361			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 362
 363	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 364	 * workaround for a possible hang in the unlikely event a TLB
 365	 * invalidation occurs during a PSD flush.
 366	 */
 367	/* WaForceEnableNonCoherent:bdw,chv */
 368	/* WaHdcDisableFetchWhenMasked:bdw,chv */
 369	wa_masked_en(wal, HDC_CHICKEN0,
 370		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 371		     HDC_FORCE_NON_COHERENT);
 372
 373	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 374	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 375	 *  polygons in the same 8x4 pixel/sample area to be processed without
 376	 *  stalling waiting for the earlier ones to write to Hierarchical Z
 377	 *  buffer."
 378	 *
 379	 * This optimization is off by default for BDW and CHV; turn it on.
 380	 */
 381	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 382
 383	/* Wa4x4STCOptimizationDisable:bdw,chv */
 384	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 385
 386	/*
 387	 * BSpec recommends 8x4 when MSAA is used,
 388	 * however in practice 16x4 seems fastest.
 389	 *
 390	 * Note that PS/WM thread counts depend on the WIZ hashing
 391	 * disable bit, which we don't touch here, but it's good
 392	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 393	 */
 394	wa_masked_field_set(wal, GEN7_GT_MODE,
 395			    GEN6_WIZ_HASHING_MASK,
 396			    GEN6_WIZ_HASHING_16x4);
 397}
 398
 399static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 400				     struct i915_wa_list *wal)
 401{
 402	struct drm_i915_private *i915 = engine->i915;
 403
 404	gen8_ctx_workarounds_init(engine, wal);
 405
 406	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 407	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 408
 409	/* WaDisableDopClockGating:bdw
 410	 *
 411	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 412	 * to disable EUTC clock gating.
 413	 */
 414	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
 415			 DOP_CLOCK_GATING_DISABLE);
 416
 417	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
 418			 GEN8_SAMPLER_POWER_BYPASS_DIS);
 419
 420	wa_masked_en(wal, HDC_CHICKEN0,
 421		     /* WaForceContextSaveRestoreNonCoherent:bdw */
 422		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 423		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 424		     (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 425}
 426
 427static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 428				     struct i915_wa_list *wal)
 429{
 430	gen8_ctx_workarounds_init(engine, wal);
 431
 432	/* WaDisableThreadStallDopClockGating:chv */
 433	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 434
 435	/* Improve HiZ throughput on CHV. */
 436	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 437}
 438
 439static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 440				      struct i915_wa_list *wal)
 441{
 442	struct drm_i915_private *i915 = engine->i915;
 443
 444	if (HAS_LLC(i915)) {
 445		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 446		 *
 447		 * Must match Display Engine. See
 448		 * WaCompressedResourceDisplayNewHashMode.
 449		 */
 450		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 451			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
 452		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
 453				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 454	}
 455
 456	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 457	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 458	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 459			 FLOW_CONTROL_ENABLE |
 460			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 461
 462	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 463	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 464	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
 465			 GEN9_ENABLE_YV12_BUGFIX |
 466			 GEN9_ENABLE_GPGPU_PREEMPTION);
 467
 468	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 469	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 470	wa_masked_en(wal, CACHE_MODE_1,
 471		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 472		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 473
 474	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 475	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
 476			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 477
 478	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 479	wa_masked_en(wal, HDC_CHICKEN0,
 480		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 481		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 482
 483	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 484	 * both tied to WaForceContextSaveRestoreNonCoherent
 485	 * in some hsds for skl. We keep the tie for all gen9. The
 486	 * documentation is a bit hazy and so we want to get common behaviour,
 487	 * even though there is no clear evidence we would need both on kbl/bxt.
 488	 * This area has been source of system hangs so we play it safe
 489	 * and mimic the skl regardless of what bspec says.
 490	 *
 491	 * Use Force Non-Coherent whenever executing a 3D context. This
 492	 * is a workaround for a possible hang in the unlikely event
 493	 * a TLB invalidation occurs during a PSD flush.
 494	 */
 495
 496	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 497	wa_masked_en(wal, HDC_CHICKEN0,
 498		     HDC_FORCE_NON_COHERENT);
 499
 500	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 501	if (IS_SKYLAKE(i915) ||
 502	    IS_KABYLAKE(i915) ||
 503	    IS_COFFEELAKE(i915) ||
 504	    IS_COMETLAKE(i915))
 505		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
 506				 GEN8_SAMPLER_POWER_BYPASS_DIS);
 507
 508	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 509	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 510
 511	/*
 512	 * Supporting preemption with fine-granularity requires changes in the
 513	 * batch buffer programming. Since we can't break old userspace, we
 514	 * need to set our default preemption level to safe value. Userspace is
 515	 * still able to use more fine-grained preemption levels, since in
 516	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
 517	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
 518	 * not real HW workarounds, but merely a way to start using preemption
 519	 * while maintaining old contract with userspace.
 520	 */
 521
 522	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
 523	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 524
 525	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
 526	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 527			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 528			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 529
 530	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
 531	if (IS_GEN9_LP(i915))
 532		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 533}
 534
 535static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
 536				struct i915_wa_list *wal)
 537{
 538	struct intel_gt *gt = engine->gt;
 539	u8 vals[3] = { 0, 0, 0 };
 540	unsigned int i;
 541
 542	for (i = 0; i < 3; i++) {
 543		u8 ss;
 544
 545		/*
 546		 * Only consider slices where one, and only one, subslice has 7
 547		 * EUs
 548		 */
 549		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
 550			continue;
 551
 552		/*
 553		 * subslice_7eu[i] != 0 (because of the check above) and
 554		 * ss_max == 4 (maximum number of subslices possible per slice)
 555		 *
 556		 * ->    0 <= ss <= 3;
 557		 */
 558		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
 559		vals[i] = 3 - ss;
 560	}
 561
 562	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 563		return;
 564
 565	/* Tune IZ hashing. See intel_device_info_runtime_init() */
 566	wa_masked_field_set(wal, GEN7_GT_MODE,
 567			    GEN9_IZ_HASHING_MASK(2) |
 568			    GEN9_IZ_HASHING_MASK(1) |
 569			    GEN9_IZ_HASHING_MASK(0),
 570			    GEN9_IZ_HASHING(2, vals[2]) |
 571			    GEN9_IZ_HASHING(1, vals[1]) |
 572			    GEN9_IZ_HASHING(0, vals[0]));
 573}
 574
 575static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
 576				     struct i915_wa_list *wal)
 577{
 578	gen9_ctx_workarounds_init(engine, wal);
 579	skl_tune_iz_hashing(engine, wal);
 580}
 581
 582static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 583				     struct i915_wa_list *wal)
 584{
 585	gen9_ctx_workarounds_init(engine, wal);
 586
 587	/* WaDisableThreadStallDopClockGating:bxt */
 588	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 589			 STALL_DOP_GATING_DISABLE);
 590
 591	/* WaToEnableHwFixForPushConstHWBug:bxt */
 592	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 593		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 594}
 595
 596static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 597				     struct i915_wa_list *wal)
 598{
 599	struct drm_i915_private *i915 = engine->i915;
 600
 601	gen9_ctx_workarounds_init(engine, wal);
 602
 603	/* WaToEnableHwFixForPushConstHWBug:kbl */
 604	if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
 605		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 606			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 607
 608	/* WaDisableSbeCacheDispatchPortSharing:kbl */
 609	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
 610			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 611}
 612
 613static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
 614				     struct i915_wa_list *wal)
 615{
 616	gen9_ctx_workarounds_init(engine, wal);
 617
 618	/* WaToEnableHwFixForPushConstHWBug:glk */
 619	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 620		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 621}
 622
 623static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 624				     struct i915_wa_list *wal)
 625{
 626	gen9_ctx_workarounds_init(engine, wal);
 627
 628	/* WaToEnableHwFixForPushConstHWBug:cfl */
 629	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 630		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 631
 632	/* WaDisableSbeCacheDispatchPortSharing:cfl */
 633	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
 634			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 635}
 636
 637static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 638				     struct i915_wa_list *wal)
 639{
 640	/* Wa_1406697149 (WaDisableBankHangMode:icl) */
 641	wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
 642
 643	/* WaForceEnableNonCoherent:icl
 644	 * This is not the same workaround as in early Gen9 platforms, where
 645	 * lacking this could cause system hangs, but coherency performance
 646	 * overhead is high and only a few compute workloads really need it
 647	 * (the register is whitelisted in hardware now, so UMDs can opt in
 648	 * for coherency if they have a good reason).
 649	 */
 650	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 651
 652	/* WaEnableFloatBlendOptimization:icl */
 653	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
 654		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
 655		   0 /* write-only, so skip validation */,
 656		   true);
 657
 658	/* WaDisableGPGPUMidThreadPreemption:icl */
 659	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 660			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 661			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 662
 663	/* allow headerless messages for preemptible GPGPU context */
 664	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
 665			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 666
 667	/* Wa_1604278689:icl,ehl */
 668	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
 669	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
 670			 0,
 671			 0xFFFFFFFF);
 672
 673	/* Wa_1406306137:icl,ehl */
 674	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 675}
 676
 677/*
 678 * These settings aren't actually workarounds, but general tuning settings that
 679 * need to be programmed on dg2 platform.
 680 */
 681static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 682				   struct i915_wa_list *wal)
 683{
 684	wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
 685	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 686			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
 687	wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
 688			     FF_MODE2_TDS_TIMER_128);
 689}
 690
 691static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
 692				       struct i915_wa_list *wal)
 693{
 694	struct drm_i915_private *i915 = engine->i915;
 695
 696	/*
 697	 * Wa_1409142259:tgl,dg1,adl-p
 698	 * Wa_1409347922:tgl,dg1,adl-p
 699	 * Wa_1409252684:tgl,dg1,adl-p
 700	 * Wa_1409217633:tgl,dg1,adl-p
 701	 * Wa_1409207793:tgl,dg1,adl-p
 702	 * Wa_1409178076:tgl,dg1,adl-p
 703	 * Wa_1408979724:tgl,dg1,adl-p
 704	 * Wa_14010443199:tgl,rkl,dg1,adl-p
 705	 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
 706	 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
 707	 */
 708	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
 709		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
 710
 711	/* WaDisableGPGPUMidThreadPreemption:gen12 */
 712	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 713			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 714			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 715
 716	/*
 717	 * Wa_16011163337 - GS_TIMER
 718	 *
 719	 * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
 720	 * need to program it even on those that don't explicitly list that
 721	 * workaround.
 722	 *
 723	 * Note that the programming of GEN12_FF_MODE2 is further modified
 724	 * according to the FF_MODE2 guidance given by Wa_1608008084.
 725	 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
 726	 * value when read from the CPU.
 727	 *
 728	 * The default value for this register is zero for all fields.
 729	 * So instead of doing a RMW we should just write the desired values
 730	 * for TDS and GS timers. Note that since the readback can't be trusted,
 731	 * the clear mask is just set to ~0 to make sure other bits are not
 732	 * inadvertently set. For the same reason read verification is ignored.
 733	 */
 734	wa_add(wal,
 735	       GEN12_FF_MODE2,
 736	       ~0,
 737	       FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
 738	       0, false);
 739
 740	if (!IS_DG1(i915)) {
 741		/* Wa_1806527549 */
 742		wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
 743
 744		/* Wa_1606376872 */
 745		wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
 746	}
 747}
 748
 749static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
 750				     struct i915_wa_list *wal)
 751{
 752	gen12_ctx_workarounds_init(engine, wal);
 753
 754	/* Wa_1409044764 */
 755	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
 756		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
 757
 758	/* Wa_22010493298 */
 759	wa_masked_en(wal, HIZ_CHICKEN,
 760		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
 761}
 762
 763static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 764				     struct i915_wa_list *wal)
 765{
 766	dg2_ctx_gt_tuning_init(engine, wal);
 767
 768	/* Wa_16013271637:dg2 */
 769	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
 770			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 771
 772	/* Wa_14014947963:dg2 */
 773	wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
 774
 775	/* Wa_18018764978:dg2 */
 776	wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
 777
 778	/* Wa_18019271663:dg2 */
 779	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
 780
 781	/* Wa_14019877138:dg2 */
 782	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 783}
 784
 785static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 786				     struct i915_wa_list *wal)
 787{
 788	struct intel_gt *gt = engine->gt;
 789
 790	dg2_ctx_gt_tuning_init(engine, wal);
 791
 792	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
 793	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
 794		wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
 795}
 796
 797static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
 798				       struct i915_wa_list *wal)
 799{
 800	struct intel_gt *gt = engine->gt;
 801
 802	xelpg_ctx_gt_tuning_init(engine, wal);
 803
 804	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
 805	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
 806		/* Wa_14014947963 */
 807		wa_masked_field_set(wal, VF_PREEMPTION,
 808				    PREEMPTION_VERTEX_COUNT, 0x4000);
 809
 810		/* Wa_16013271637 */
 811		wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
 812				 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 813
 814		/* Wa_18019627453 */
 815		wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
 816
 817		/* Wa_18018764978 */
 818		wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
 819	}
 820
 821	/* Wa_18019271663 */
 822	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
 823}
 824
 825static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
 826					 struct i915_wa_list *wal)
 827{
 828	/*
 829	 * This is a "fake" workaround defined by software to ensure we
 830	 * maintain reliable, backward-compatible behavior for userspace with
 831	 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
 832	 *
 833	 * The per-context setting of MI_MODE[12] determines whether the bits
 834	 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
 835	 * in the traditional manner or whether they should instead use a new
 836	 * tgl+ meaning that breaks backward compatibility, but allows nesting
 837	 * into 3rd-level batchbuffers.  When this new capability was first
 838	 * added in TGL, it remained off by default unless a context
 839	 * intentionally opted in to the new behavior.  However Xe_HPG now
 840	 * flips this on by default and requires that we explicitly opt out if
 841	 * we don't want the new behavior.
 842	 *
 843	 * From a SW perspective, we want to maintain the backward-compatible
 844	 * behavior for userspace, so we'll apply a fake workaround to set it
 845	 * back to the legacy behavior on platforms where the hardware default
 846	 * is to break compatibility.  At the moment there is no Linux
 847	 * userspace that utilizes third-level batchbuffers, so this will avoid
 848	 * userspace from needing to make any changes.  using the legacy
 849	 * meaning is the correct thing to do.  If/when we have userspace
 850	 * consumers that want to utilize third-level batch nesting, we can
 851	 * provide a context parameter to allow them to opt-in.
 852	 */
 853	wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
 854}
 855
 856static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
 857				   struct i915_wa_list *wal)
 858{
 859	u8 mocs;
 860
 861	/*
 862	 * Some blitter commands do not have a field for MOCS, those
 863	 * commands will use MOCS index pointed by BLIT_CCTL.
 864	 * BLIT_CCTL registers are needed to be programmed to un-cached.
 865	 */
 866	if (engine->class == COPY_ENGINE_CLASS) {
 867		mocs = engine->gt->mocs.uc_index;
 868		wa_write_clr_set(wal,
 869				 BLIT_CCTL(engine->mmio_base),
 870				 BLIT_CCTL_MASK,
 871				 BLIT_CCTL_MOCS(mocs, mocs));
 872	}
 873}
 874
 875/*
 876 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
 877 * defined by the hardware team, but it programming general context registers.
 878 * Adding those context register programming in context workaround
 879 * allow us to use the wa framework for proper application and validation.
 880 */
 881static void
 882gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
 883			  struct i915_wa_list *wal)
 884{
 885	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 886		fakewa_disable_nestedbb_mode(engine, wal);
 887
 888	gen12_ctx_gt_mocs_init(engine, wal);
 889}
 890
 891static void
 892__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 893			   struct i915_wa_list *wal,
 894			   const char *name)
 895{
 896	struct drm_i915_private *i915 = engine->i915;
 897
 898	wa_init_start(wal, engine->gt, name, engine->name);
 899
 900	/* Applies to all engines */
 901	/*
 902	 * Fake workarounds are not the actual workaround but
 903	 * programming of context registers using workaround framework.
 904	 */
 905	if (GRAPHICS_VER(i915) >= 12)
 906		gen12_ctx_gt_fake_wa_init(engine, wal);
 907
 908	if (engine->class != RENDER_CLASS)
 909		goto done;
 910
 911	if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
 912		xelpg_ctx_workarounds_init(engine, wal);
 913	else if (IS_PONTEVECCHIO(i915))
 914		; /* noop; none at this time */
 915	else if (IS_DG2(i915))
 916		dg2_ctx_workarounds_init(engine, wal);
 917	else if (IS_XEHPSDV(i915))
 918		; /* noop; none at this time */
 919	else if (IS_DG1(i915))
 920		dg1_ctx_workarounds_init(engine, wal);
 921	else if (GRAPHICS_VER(i915) == 12)
 922		gen12_ctx_workarounds_init(engine, wal);
 923	else if (GRAPHICS_VER(i915) == 11)
 924		icl_ctx_workarounds_init(engine, wal);
 925	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
 926		cfl_ctx_workarounds_init(engine, wal);
 927	else if (IS_GEMINILAKE(i915))
 928		glk_ctx_workarounds_init(engine, wal);
 929	else if (IS_KABYLAKE(i915))
 930		kbl_ctx_workarounds_init(engine, wal);
 931	else if (IS_BROXTON(i915))
 932		bxt_ctx_workarounds_init(engine, wal);
 933	else if (IS_SKYLAKE(i915))
 934		skl_ctx_workarounds_init(engine, wal);
 935	else if (IS_CHERRYVIEW(i915))
 936		chv_ctx_workarounds_init(engine, wal);
 937	else if (IS_BROADWELL(i915))
 938		bdw_ctx_workarounds_init(engine, wal);
 939	else if (GRAPHICS_VER(i915) == 7)
 940		gen7_ctx_workarounds_init(engine, wal);
 941	else if (GRAPHICS_VER(i915) == 6)
 942		gen6_ctx_workarounds_init(engine, wal);
 943	else if (GRAPHICS_VER(i915) < 8)
 944		;
 945	else
 946		MISSING_CASE(GRAPHICS_VER(i915));
 947
 948done:
 949	wa_init_finish(wal);
 950}
 951
 952void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 953{
 954	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
 955}
 956
 957int intel_engine_emit_ctx_wa(struct i915_request *rq)
 958{
 959	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
 960	struct intel_uncore *uncore = rq->engine->uncore;
 961	enum forcewake_domains fw;
 962	unsigned long flags;
 963	struct i915_wa *wa;
 964	unsigned int i;
 965	u32 *cs;
 966	int ret;
 967
 968	if (wal->count == 0)
 969		return 0;
 970
 971	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 972	if (ret)
 973		return ret;
 974
 975	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
 976	if (IS_ERR(cs))
 977		return PTR_ERR(cs);
 978
 979	fw = wal_get_fw_for_rmw(uncore, wal);
 980
 981	intel_gt_mcr_lock(wal->gt, &flags);
 982	spin_lock(&uncore->lock);
 983	intel_uncore_forcewake_get__locked(uncore, fw);
 984
 985	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 986	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 987		u32 val;
 988
 989		/* Skip reading the register if it's not really needed */
 990		if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
 991			val = wa->set;
 992		} else {
 993			val = wa->is_mcr ?
 994				intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
 995				intel_uncore_read_fw(uncore, wa->reg);
 996			val &= ~wa->clr;
 997			val |= wa->set;
 998		}
 999
1000		*cs++ = i915_mmio_reg_offset(wa->reg);
1001		*cs++ = val;
1002	}
1003	*cs++ = MI_NOOP;
1004
1005	intel_uncore_forcewake_put__locked(uncore, fw);
1006	spin_unlock(&uncore->lock);
1007	intel_gt_mcr_unlock(wal->gt, flags);
1008
1009	intel_ring_advance(rq, cs);
1010
1011	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1012	if (ret)
1013		return ret;
1014
1015	return 0;
1016}
1017
1018static void
1019gen4_gt_workarounds_init(struct intel_gt *gt,
1020			 struct i915_wa_list *wal)
1021{
1022	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1023	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1024}
1025
1026static void
1027g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1028{
1029	gen4_gt_workarounds_init(gt, wal);
1030
1031	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1032	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
1033}
1034
1035static void
1036ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1037{
1038	g4x_gt_workarounds_init(gt, wal);
1039
1040	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
1041}
1042
1043static void
1044snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1045{
1046}
1047
1048static void
1049ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1050{
1051	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
1052	wa_masked_dis(wal,
1053		      GEN7_COMMON_SLICE_CHICKEN1,
1054		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
1055
1056	/* WaApplyL3ControlAndL3ChickenMode:ivb */
1057	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
1058	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
1059
1060	/* WaForceL3Serialization:ivb */
1061	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1062}
1063
1064static void
1065vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1066{
1067	/* WaForceL3Serialization:vlv */
1068	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1069
1070	/*
1071	 * WaIncreaseL3CreditsForVLVB0:vlv
1072	 * This is the hardware default actually.
1073	 */
1074	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
1075}
1076
1077static void
1078hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1079{
1080	/* L3 caching of data atomics doesn't work -- disable it. */
1081	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
1082
1083	wa_add(wal,
1084	       HSW_ROW_CHICKEN3, 0,
1085	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1086	       0 /* XXX does this reg exist? */, true);
1087
1088	/* WaVSRefCountFullforceMissDisable:hsw */
1089	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
1090}
1091
1092static void
1093gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1094{
1095	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1096	unsigned int slice, subslice;
1097	u32 mcr, mcr_mask;
1098
1099	GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
1100
1101	/*
1102	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
1103	 * Before any MMIO read into slice/subslice specific registers, MCR
1104	 * packet control register needs to be programmed to point to any
1105	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
1106	 * This means each subsequent MMIO read will be forwarded to an
1107	 * specific s/ss combination, but this is OK since these registers
1108	 * are consistent across s/ss in almost all cases. In the rare
1109	 * occasions, such as INSTDONE, where this value is dependent
1110	 * on s/ss combo, the read should be done with read_subslice_reg.
1111	 */
1112	slice = ffs(sseu->slice_mask) - 1;
1113	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
1114	subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1115	GEM_BUG_ON(!subslice);
1116	subslice--;
1117
1118	/*
1119	 * We use GEN8_MCR..() macros to calculate the |mcr| value for
1120	 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
1121	 */
1122	mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1123	mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1124
1125	drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
1126
1127	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1128}
1129
1130static void
1131gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1132{
1133	struct drm_i915_private *i915 = gt->i915;
1134
1135	/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
1136	gen9_wa_init_mcr(i915, wal);
1137
1138	/* WaDisableKillLogic:bxt,skl,kbl */
1139	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1140		wa_write_or(wal,
1141			    GAM_ECOCHK,
1142			    ECOCHK_DIS_TLB);
1143
1144	if (HAS_LLC(i915)) {
1145		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1146		 *
1147		 * Must match Display Engine. See
1148		 * WaCompressedResourceDisplayNewHashMode.
1149		 */
1150		wa_write_or(wal,
1151			    MMCD_MISC_CTRL,
1152			    MMCD_PCLA | MMCD_HOTSPOT_EN);
1153	}
1154
1155	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1156	wa_write_or(wal,
1157		    GAM_ECOCHK,
1158		    BDW_DISABLE_HDC_INVALIDATION);
1159}
1160
1161static void
1162skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1163{
1164	gen9_gt_workarounds_init(gt, wal);
1165
1166	/* WaDisableGafsUnitClkGating:skl */
1167	wa_write_or(wal,
1168		    GEN7_UCGCTL4,
1169		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1170
1171	/* WaInPlaceDecompressionHang:skl */
1172	if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1173		wa_write_or(wal,
1174			    GEN9_GAMT_ECO_REG_RW_IA,
1175			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1176}
1177
1178static void
1179kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1180{
1181	gen9_gt_workarounds_init(gt, wal);
1182
1183	/* WaDisableDynamicCreditSharing:kbl */
1184	if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1185		wa_write_or(wal,
1186			    GAMT_CHKN_BIT_REG,
1187			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1188
1189	/* WaDisableGafsUnitClkGating:kbl */
1190	wa_write_or(wal,
1191		    GEN7_UCGCTL4,
1192		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1193
1194	/* WaInPlaceDecompressionHang:kbl */
1195	wa_write_or(wal,
1196		    GEN9_GAMT_ECO_REG_RW_IA,
1197		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1198}
1199
1200static void
1201glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1202{
1203	gen9_gt_workarounds_init(gt, wal);
1204}
1205
1206static void
1207cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1208{
1209	gen9_gt_workarounds_init(gt, wal);
1210
1211	/* WaDisableGafsUnitClkGating:cfl */
1212	wa_write_or(wal,
1213		    GEN7_UCGCTL4,
1214		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1215
1216	/* WaInPlaceDecompressionHang:cfl */
1217	wa_write_or(wal,
1218		    GEN9_GAMT_ECO_REG_RW_IA,
1219		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1220}
1221
1222static void __set_mcr_steering(struct i915_wa_list *wal,
1223			       i915_reg_t steering_reg,
1224			       unsigned int slice, unsigned int subslice)
1225{
1226	u32 mcr, mcr_mask;
1227
1228	mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1229	mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1230
1231	wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
1232}
1233
1234static void debug_dump_steering(struct intel_gt *gt)
1235{
1236	struct drm_printer p = drm_debug_printer("MCR Steering:");
1237
1238	if (drm_debug_enabled(DRM_UT_DRIVER))
1239		intel_gt_mcr_report_steering(&p, gt, false);
1240}
1241
1242static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
1243			 unsigned int slice, unsigned int subslice)
1244{
1245	__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1246
1247	gt->default_steering.groupid = slice;
1248	gt->default_steering.instanceid = subslice;
1249
1250	debug_dump_steering(gt);
1251}
1252
1253static void
1254icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1255{
1256	const struct sseu_dev_info *sseu = &gt->info.sseu;
1257	unsigned int subslice;
1258
1259	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1260	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1261
1262	/*
1263	 * Although a platform may have subslices, we need to always steer
1264	 * reads to the lowest instance that isn't fused off.  When Render
1265	 * Power Gating is enabled, grabbing forcewake will only power up a
1266	 * single subslice (the "minconfig") if there isn't a real workload
1267	 * that needs to be run; this means that if we steer register reads to
1268	 * one of the higher subslices, we run the risk of reading back 0's or
1269	 * random garbage.
1270	 */
1271	subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1272
1273	/*
1274	 * If the subslice we picked above also steers us to a valid L3 bank,
1275	 * then we can just rely on the default steering and won't need to
1276	 * worry about explicitly re-steering L3BANK reads later.
1277	 */
1278	if (gt->info.l3bank_mask & BIT(subslice))
1279		gt->steering_table[L3BANK] = NULL;
1280
1281	__add_mcr_wa(gt, wal, 0, subslice);
1282}
1283
1284static void
1285xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1286{
1287	const struct sseu_dev_info *sseu = &gt->info.sseu;
1288	unsigned long slice, subslice = 0, slice_mask = 0;
1289	u32 lncf_mask = 0;
1290	int i;
1291
1292	/*
1293	 * On Xe_HP the steering increases in complexity. There are now several
1294	 * more units that require steering and we're not guaranteed to be able
1295	 * to find a common setting for all of them. These are:
1296	 * - GSLICE (fusable)
1297	 * - DSS (sub-unit within gslice; fusable)
1298	 * - L3 Bank (fusable)
1299	 * - MSLICE (fusable)
1300	 * - LNCF (sub-unit within mslice; always present if mslice is present)
1301	 *
1302	 * We'll do our default/implicit steering based on GSLICE (in the
1303	 * sliceid field) and DSS (in the subsliceid field).  If we can
1304	 * find overlap between the valid MSLICE and/or LNCF values with
1305	 * a suitable GSLICE, then we can just re-use the default value and
1306	 * skip and explicit steering at runtime.
1307	 *
1308	 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1309	 * a valid sliceid value.  DSS steering is the only type of steering
1310	 * that utilizes the 'subsliceid' bits.
1311	 *
1312	 * Also note that, even though the steering domain is called "GSlice"
1313	 * and it is encoded in the register using the gslice format, the spec
1314	 * says that the combined (geometry | compute) fuse should be used to
1315	 * select the steering.
1316	 */
1317
1318	/* Find the potential gslice candidates */
1319	slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
1320						       GEN_DSS_PER_GSLICE);
1321
1322	/*
1323	 * Find the potential LNCF candidates.  Either LNCF within a valid
1324	 * mslice is fine.
1325	 */
1326	for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
1327		lncf_mask |= (0x3 << (i * 2));
1328
1329	/*
1330	 * Are there any sliceid values that work for both GSLICE and LNCF
1331	 * steering?
1332	 */
1333	if (slice_mask & lncf_mask) {
1334		slice_mask &= lncf_mask;
1335		gt->steering_table[LNCF] = NULL;
1336	}
1337
1338	/* How about sliceid values that also work for MSLICE steering? */
1339	if (slice_mask & gt->info.mslice_mask) {
1340		slice_mask &= gt->info.mslice_mask;
1341		gt->steering_table[MSLICE] = NULL;
1342	}
1343
1344	if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
1345		gt->steering_table[GAM] = NULL;
1346
1347	slice = __ffs(slice_mask);
1348	subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
1349		GEN_DSS_PER_GSLICE;
1350
1351	__add_mcr_wa(gt, wal, slice, subslice);
1352
1353	/*
1354	 * SQIDI ranges are special because they use different steering
1355	 * registers than everything else we work with.  On XeHP SDV and
1356	 * DG2-G10, any value in the steering registers will work fine since
1357	 * all instances are present, but DG2-G11 only has SQIDI instances at
1358	 * ID's 2 and 3, so we need to steer to one of those.  For simplicity
1359	 * we'll just steer to a hardcoded "2" since that value will work
1360	 * everywhere.
1361	 */
1362	__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
1363	__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1364
1365	/*
1366	 * On DG2, GAM registers have a dedicated steering control register
1367	 * and must always be programmed to a hardcoded groupid of "1."
1368	 */
1369	if (IS_DG2(gt->i915))
1370		__set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1371}
1372
1373static void
1374pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1375{
1376	unsigned int dss;
1377
1378	/*
1379	 * Setup implicit steering for COMPUTE and DSS ranges to the first
1380	 * non-fused-off DSS.  All other types of MCR registers will be
1381	 * explicitly steered.
1382	 */
1383	dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
1384	__add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
1385}
1386
1387static void
1388icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1389{
1390	struct drm_i915_private *i915 = gt->i915;
1391
1392	icl_wa_init_mcr(gt, wal);
1393
1394	/* WaModifyGamTlbPartitioning:icl */
1395	wa_write_clr_set(wal,
1396			 GEN11_GACB_PERF_CTRL,
1397			 GEN11_HASH_CTRL_MASK,
1398			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1399
1400	/* Wa_1405766107:icl
1401	 * Formerly known as WaCL2SFHalfMaxAlloc
1402	 */
1403	wa_write_or(wal,
1404		    GEN11_LSN_UNSLCVC,
1405		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1406		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1407
1408	/* Wa_220166154:icl
1409	 * Formerly known as WaDisCtxReload
1410	 */
1411	wa_write_or(wal,
1412		    GEN8_GAMW_ECO_DEV_RW_IA,
1413		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1414
1415	/* Wa_1406463099:icl
1416	 * Formerly known as WaGamTlbPendError
1417	 */
1418	wa_write_or(wal,
1419		    GAMT_CHKN_BIT_REG,
1420		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
1421
1422	/*
1423	 * Wa_1408615072:icl,ehl  (vsunit)
1424	 * Wa_1407596294:icl,ehl  (hsunit)
1425	 */
1426	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1427		    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1428
1429	/* Wa_1407352427:icl,ehl */
1430	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1431		    PSDUNIT_CLKGATE_DIS);
1432
1433	/* Wa_1406680159:icl,ehl */
1434	wa_mcr_write_or(wal,
1435			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1436			GWUNIT_CLKGATE_DIS);
1437
1438	/* Wa_1607087056:icl,ehl,jsl */
1439	if (IS_ICELAKE(i915) ||
1440		((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
1441		IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
1442		wa_write_or(wal,
1443			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1444			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1445
1446	/*
1447	 * This is not a documented workaround, but rather an optimization
1448	 * to reduce sampler power.
1449	 */
1450	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1451}
1452
1453/*
1454 * Though there are per-engine instances of these registers,
1455 * they retain their value through engine resets and should
1456 * only be provided on the GT workaround list rather than
1457 * the engine-specific workaround list.
1458 */
1459static void
1460wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1461{
1462	struct intel_engine_cs *engine;
1463	int id;
1464
1465	for_each_engine(engine, gt, id) {
1466		if (engine->class != VIDEO_DECODE_CLASS ||
1467		    (engine->instance % 2))
1468			continue;
1469
1470		wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
1471			    IECPUNIT_CLKGATE_DIS);
1472	}
1473}
1474
1475static void
1476gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1477{
1478	icl_wa_init_mcr(gt, wal);
1479
1480	/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1481	wa_14011060649(gt, wal);
1482
1483	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1484	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1485
1486	/*
1487	 * Wa_14015795083
1488	 *
1489	 * Firmware on some gen12 platforms locks the MISCCPCTL register,
1490	 * preventing i915 from modifying it for this workaround.  Skip the
1491	 * readback verification for this workaround on debug builds; if the
1492	 * workaround doesn't stick due to firmware behavior, it's not an error
1493	 * that we want CI to flag.
1494	 */
1495	wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
1496	       0, 0, false);
1497}
1498
1499static void
1500dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1501{
1502	gen12_gt_workarounds_init(gt, wal);
1503
1504	/* Wa_1409420604:dg1 */
1505	wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
1506			CPSSUNIT_CLKGATE_DIS);
1507
1508	/* Wa_1408615072:dg1 */
1509	/* Empirical testing shows this register is unaffected by engine reset. */
1510	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
1511}
1512
1513static void
1514xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1515{
1516	struct drm_i915_private *i915 = gt->i915;
1517
1518	xehp_init_mcr(gt, wal);
1519
1520	/* Wa_1409757795:xehpsdv */
1521	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1522
1523	/* Wa_18011725039:xehpsdv */
1524	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
1525		wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
1526		wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
1527	}
1528
1529	/* Wa_16011155590:xehpsdv */
1530	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1531		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1532			    TSGUNIT_CLKGATE_DIS);
1533
1534	/* Wa_14011780169:xehpsdv */
1535	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
1536		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1537			    GAMTLBVDBOX7_CLKGATE_DIS |
1538			    GAMTLBVDBOX6_CLKGATE_DIS |
1539			    GAMTLBVDBOX5_CLKGATE_DIS |
1540			    GAMTLBVDBOX4_CLKGATE_DIS |
1541			    GAMTLBVDBOX3_CLKGATE_DIS |
1542			    GAMTLBVDBOX2_CLKGATE_DIS |
1543			    GAMTLBVDBOX1_CLKGATE_DIS |
1544			    GAMTLBVDBOX0_CLKGATE_DIS |
1545			    GAMTLBKCR_CLKGATE_DIS |
1546			    GAMTLBGUC_CLKGATE_DIS |
1547			    GAMTLBBLT_CLKGATE_DIS);
1548		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1549			    GAMTLBGFXA1_CLKGATE_DIS |
1550			    GAMTLBCOMPA0_CLKGATE_DIS |
1551			    GAMTLBCOMPA1_CLKGATE_DIS |
1552			    GAMTLBCOMPB0_CLKGATE_DIS |
1553			    GAMTLBCOMPB1_CLKGATE_DIS |
1554			    GAMTLBCOMPC0_CLKGATE_DIS |
1555			    GAMTLBCOMPC1_CLKGATE_DIS |
1556			    GAMTLBCOMPD0_CLKGATE_DIS |
1557			    GAMTLBCOMPD1_CLKGATE_DIS |
1558			    GAMTLBMERT_CLKGATE_DIS   |
1559			    GAMTLBVEBOX3_CLKGATE_DIS |
1560			    GAMTLBVEBOX2_CLKGATE_DIS |
1561			    GAMTLBVEBOX1_CLKGATE_DIS |
1562			    GAMTLBVEBOX0_CLKGATE_DIS);
1563	}
1564
1565	/* Wa_16012725990:xehpsdv */
1566	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
1567		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
1568
1569	/* Wa_14011060649:xehpsdv */
1570	wa_14011060649(gt, wal);
1571
1572	/* Wa_14012362059:xehpsdv */
1573	wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
1574
1575	/* Wa_14014368820:xehpsdv */
1576	wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1577			INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1578
1579	/* Wa_14010670810:xehpsdv */
1580	wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1581}
1582
1583static void
1584dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1585{
1586	xehp_init_mcr(gt, wal);
1587
1588	/* Wa_14011060649:dg2 */
1589	wa_14011060649(gt, wal);
1590
1591	if (IS_DG2_G10(gt->i915)) {
1592		/* Wa_22010523718:dg2 */
1593		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1594			    CG3DDISCFEG_CLKGATE_DIS);
1595
1596		/* Wa_14011006942:dg2 */
1597		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1598				DSS_ROUTER_CLKGATE_DIS);
1599	}
1600
1601	/* Wa_14014830051:dg2 */
1602	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1603
1604	/*
1605	 * Wa_14015795083
1606	 * Skip verification for possibly locked register.
1607	 */
1608	wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
1609	       0, 0, false);
1610
1611	/* Wa_18018781329 */
1612	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1613	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1614	wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1615	wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1616
1617	/* Wa_1509235366:dg2 */
1618	wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1619			INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1620
1621	/* Wa_14010648519:dg2 */
1622	wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1623}
1624
1625static void
1626pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1627{
1628	pvc_init_mcr(gt, wal);
1629
1630	/* Wa_14015795083 */
1631	wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1632
1633	/* Wa_18018781329 */
1634	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1635	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1636	wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1637	wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1638
1639	/* Wa_16016694945 */
1640	wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
1641}
1642
1643static void
1644xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1645{
1646	/* Wa_14018778641 / Wa_18018781329 */
1647	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1648
1649	/* Wa_22016670082 */
1650	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
1651
1652	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1653	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
1654		/* Wa_14014830051 */
1655		wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1656
1657		/* Wa_14015795083 */
1658		wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1659	}
1660
1661	/*
1662	 * Unlike older platforms, we no longer setup implicit steering here;
1663	 * all MCR accesses are explicitly steered.
1664	 */
1665	debug_dump_steering(gt);
1666}
1667
1668static void
1669wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
1670{
1671	struct intel_engine_cs *engine;
1672	int id;
1673
1674	for_each_engine(engine, gt, id)
1675		if (engine->class == VIDEO_DECODE_CLASS)
1676			wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
1677				    MFXPIPE_CLKGATE_DIS);
1678}
1679
1680static void
1681xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1682{
1683	wa_16021867713(gt, wal);
1684
1685	/*
1686	 * Wa_14018778641
1687	 * Wa_18018781329
1688	 *
1689	 * Note that although these registers are MCR on the primary
1690	 * GT, the media GT's versions are regular singleton registers.
1691	 */
1692	wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
1693
1694	/* Wa_22016670082 */
1695	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
1696
1697	debug_dump_steering(gt);
1698}
1699
1700/*
1701 * The bspec performance guide has recommended MMIO tuning settings.  These
1702 * aren't truly "workarounds" but we want to program them through the
1703 * workaround infrastructure to make sure they're (re)applied at the proper
1704 * times.
1705 *
1706 * The programming in this function is for settings that persist through
1707 * engine resets and also are not part of any engine's register state context.
1708 * I.e., settings that only need to be re-applied in the event of a full GT
1709 * reset.
1710 */
1711static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
1712{
1713	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
1714		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1715		wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1716	}
1717
1718	if (IS_PONTEVECCHIO(gt->i915)) {
1719		wa_mcr_write(wal, XEHPC_L3SCRUB,
1720			     SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
1721		wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
1722	}
1723
1724	if (IS_DG2(gt->i915)) {
1725		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1726		wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1727	}
1728}
1729
1730static void
1731gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1732{
1733	struct drm_i915_private *i915 = gt->i915;
1734
1735	gt_tuning_settings(gt, wal);
1736
1737	if (gt->type == GT_MEDIA) {
1738		if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
1739			xelpmp_gt_workarounds_init(gt, wal);
1740		else
1741			MISSING_CASE(MEDIA_VER_FULL(i915));
1742
1743		return;
1744	}
1745
1746	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
1747		xelpg_gt_workarounds_init(gt, wal);
1748	else if (IS_PONTEVECCHIO(i915))
1749		pvc_gt_workarounds_init(gt, wal);
1750	else if (IS_DG2(i915))
1751		dg2_gt_workarounds_init(gt, wal);
1752	else if (IS_XEHPSDV(i915))
1753		xehpsdv_gt_workarounds_init(gt, wal);
1754	else if (IS_DG1(i915))
1755		dg1_gt_workarounds_init(gt, wal);
1756	else if (GRAPHICS_VER(i915) == 12)
1757		gen12_gt_workarounds_init(gt, wal);
1758	else if (GRAPHICS_VER(i915) == 11)
1759		icl_gt_workarounds_init(gt, wal);
1760	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1761		cfl_gt_workarounds_init(gt, wal);
1762	else if (IS_GEMINILAKE(i915))
1763		glk_gt_workarounds_init(gt, wal);
1764	else if (IS_KABYLAKE(i915))
1765		kbl_gt_workarounds_init(gt, wal);
1766	else if (IS_BROXTON(i915))
1767		gen9_gt_workarounds_init(gt, wal);
1768	else if (IS_SKYLAKE(i915))
1769		skl_gt_workarounds_init(gt, wal);
1770	else if (IS_HASWELL(i915))
1771		hsw_gt_workarounds_init(gt, wal);
1772	else if (IS_VALLEYVIEW(i915))
1773		vlv_gt_workarounds_init(gt, wal);
1774	else if (IS_IVYBRIDGE(i915))
1775		ivb_gt_workarounds_init(gt, wal);
1776	else if (GRAPHICS_VER(i915) == 6)
1777		snb_gt_workarounds_init(gt, wal);
1778	else if (GRAPHICS_VER(i915) == 5)
1779		ilk_gt_workarounds_init(gt, wal);
1780	else if (IS_G4X(i915))
1781		g4x_gt_workarounds_init(gt, wal);
1782	else if (GRAPHICS_VER(i915) == 4)
1783		gen4_gt_workarounds_init(gt, wal);
1784	else if (GRAPHICS_VER(i915) <= 8)
1785		;
1786	else
1787		MISSING_CASE(GRAPHICS_VER(i915));
1788}
1789
1790void intel_gt_init_workarounds(struct intel_gt *gt)
1791{
1792	struct i915_wa_list *wal = &gt->wa_list;
1793
1794	wa_init_start(wal, gt, "GT", "global");
1795	gt_init_workarounds(gt, wal);
1796	wa_init_finish(wal);
1797}
1798
1799static bool
1800wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
1801	  const char *name, const char *from)
1802{
1803	if ((cur ^ wa->set) & wa->read) {
1804		gt_err(gt,
1805		       "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1806		       name, from, i915_mmio_reg_offset(wa->reg),
1807		       cur, cur & wa->read, wa->set & wa->read);
1808
1809		return false;
1810	}
1811
1812	return true;
1813}
1814
1815static void wa_list_apply(const struct i915_wa_list *wal)
1816{
1817	struct intel_gt *gt = wal->gt;
1818	struct intel_uncore *uncore = gt->uncore;
1819	enum forcewake_domains fw;
1820	unsigned long flags;
1821	struct i915_wa *wa;
1822	unsigned int i;
1823
1824	if (!wal->count)
1825		return;
1826
1827	fw = wal_get_fw_for_rmw(uncore, wal);
1828
1829	intel_gt_mcr_lock(gt, &flags);
1830	spin_lock(&uncore->lock);
1831	intel_uncore_forcewake_get__locked(uncore, fw);
1832
1833	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1834		u32 val, old = 0;
1835
1836		/* open-coded rmw due to steering */
1837		if (wa->clr)
1838			old = wa->is_mcr ?
1839				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1840				intel_uncore_read_fw(uncore, wa->reg);
1841		val = (old & ~wa->clr) | wa->set;
1842		if (val != old || !wa->clr) {
1843			if (wa->is_mcr)
1844				intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1845			else
1846				intel_uncore_write_fw(uncore, wa->reg, val);
1847		}
1848
1849		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1850			u32 val = wa->is_mcr ?
1851				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1852				intel_uncore_read_fw(uncore, wa->reg);
1853
1854			wa_verify(gt, wa, val, wal->name, "application");
1855		}
1856	}
1857
1858	intel_uncore_forcewake_put__locked(uncore, fw);
1859	spin_unlock(&uncore->lock);
1860	intel_gt_mcr_unlock(gt, flags);
1861}
1862
1863void intel_gt_apply_workarounds(struct intel_gt *gt)
1864{
1865	wa_list_apply(&gt->wa_list);
1866}
1867
1868static bool wa_list_verify(struct intel_gt *gt,
1869			   const struct i915_wa_list *wal,
1870			   const char *from)
1871{
1872	struct intel_uncore *uncore = gt->uncore;
1873	struct i915_wa *wa;
1874	enum forcewake_domains fw;
1875	unsigned long flags;
1876	unsigned int i;
1877	bool ok = true;
1878
1879	fw = wal_get_fw_for_rmw(uncore, wal);
1880
1881	intel_gt_mcr_lock(gt, &flags);
1882	spin_lock(&uncore->lock);
1883	intel_uncore_forcewake_get__locked(uncore, fw);
1884
1885	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1886		ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1887				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1888				intel_uncore_read_fw(uncore, wa->reg),
1889				wal->name, from);
1890
1891	intel_uncore_forcewake_put__locked(uncore, fw);
1892	spin_unlock(&uncore->lock);
1893	intel_gt_mcr_unlock(gt, flags);
1894
1895	return ok;
1896}
1897
1898bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1899{
1900	return wa_list_verify(gt, &gt->wa_list, from);
1901}
1902
1903__maybe_unused
1904static bool is_nonpriv_flags_valid(u32 flags)
1905{
1906	/* Check only valid flag bits are set */
1907	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1908		return false;
1909
1910	/* NB: Only 3 out of 4 enum values are valid for access field */
1911	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1912	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1913		return false;
1914
1915	return true;
1916}
1917
1918static void
1919whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1920{
1921	struct i915_wa wa = {
1922		.reg = reg
1923	};
1924
1925	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1926		return;
1927
1928	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1929		return;
1930
1931	wa.reg.reg |= flags;
1932	_wa_add(wal, &wa);
1933}
1934
1935static void
1936whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1937{
1938	struct i915_wa wa = {
1939		.mcr_reg = reg,
1940		.is_mcr = 1,
1941	};
1942
1943	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1944		return;
1945
1946	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1947		return;
1948
1949	wa.mcr_reg.reg |= flags;
1950	_wa_add(wal, &wa);
1951}
1952
1953static void
1954whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1955{
1956	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1957}
1958
1959static void
1960whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1961{
1962	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1963}
1964
1965static void gen9_whitelist_build(struct i915_wa_list *w)
1966{
1967	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1968	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1969
1970	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1971	whitelist_reg(w, GEN8_CS_CHICKEN1);
1972
1973	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1974	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1975
1976	/* WaSendPushConstantsFromMMIO:skl,bxt */
1977	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1978}
1979
1980static void skl_whitelist_build(struct intel_engine_cs *engine)
1981{
1982	struct i915_wa_list *w = &engine->whitelist;
1983
1984	if (engine->class != RENDER_CLASS)
1985		return;
1986
1987	gen9_whitelist_build(w);
1988
1989	/* WaDisableLSQCROPERFforOCL:skl */
1990	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1991}
1992
1993static void bxt_whitelist_build(struct intel_engine_cs *engine)
1994{
1995	if (engine->class != RENDER_CLASS)
1996		return;
1997
1998	gen9_whitelist_build(&engine->whitelist);
1999}
2000
2001static void kbl_whitelist_build(struct intel_engine_cs *engine)
2002{
2003	struct i915_wa_list *w = &engine->whitelist;
2004
2005	if (engine->class != RENDER_CLASS)
2006		return;
2007
2008	gen9_whitelist_build(w);
2009
2010	/* WaDisableLSQCROPERFforOCL:kbl */
2011	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2012}
2013
2014static void glk_whitelist_build(struct intel_engine_cs *engine)
2015{
2016	struct i915_wa_list *w = &engine->whitelist;
2017
2018	if (engine->class != RENDER_CLASS)
2019		return;
2020
2021	gen9_whitelist_build(w);
2022
2023	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2024	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2025}
2026
2027static void cfl_whitelist_build(struct intel_engine_cs *engine)
2028{
2029	struct i915_wa_list *w = &engine->whitelist;
2030
2031	if (engine->class != RENDER_CLASS)
2032		return;
2033
2034	gen9_whitelist_build(w);
2035
2036	/*
2037	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2038	 *
2039	 * This covers 4 register which are next to one another :
2040	 *   - PS_INVOCATION_COUNT
2041	 *   - PS_INVOCATION_COUNT_UDW
2042	 *   - PS_DEPTH_COUNT
2043	 *   - PS_DEPTH_COUNT_UDW
2044	 */
2045	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2046			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2047			  RING_FORCE_TO_NONPRIV_RANGE_4);
2048}
2049
2050static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
2051{
2052	struct i915_wa_list *w = &engine->whitelist;
2053
2054	if (engine->class != RENDER_CLASS)
2055		whitelist_reg_ext(w,
2056				  RING_CTX_TIMESTAMP(engine->mmio_base),
2057				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2058}
2059
2060static void cml_whitelist_build(struct intel_engine_cs *engine)
2061{
2062	allow_read_ctx_timestamp(engine);
2063
2064	cfl_whitelist_build(engine);
2065}
2066
2067static void icl_whitelist_build(struct intel_engine_cs *engine)
2068{
2069	struct i915_wa_list *w = &engine->whitelist;
2070
2071	allow_read_ctx_timestamp(engine);
2072
2073	switch (engine->class) {
2074	case RENDER_CLASS:
2075		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
2076		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2077
2078		/* WaAllowUMDToModifySamplerMode:icl */
2079		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2080
2081		/* WaEnableStateCacheRedirectToCS:icl */
2082		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2083
2084		/*
2085		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2086		 *
2087		 * This covers 4 register which are next to one another :
2088		 *   - PS_INVOCATION_COUNT
2089		 *   - PS_INVOCATION_COUNT_UDW
2090		 *   - PS_DEPTH_COUNT
2091		 *   - PS_DEPTH_COUNT_UDW
2092		 */
2093		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2094				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2095				  RING_FORCE_TO_NONPRIV_RANGE_4);
2096		break;
2097
2098	case VIDEO_DECODE_CLASS:
2099		/* hucStatusRegOffset */
2100		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2101				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2102		/* hucUKernelHdrInfoRegOffset */
2103		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2104				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2105		/* hucStatus2RegOffset */
2106		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2107				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2108		break;
2109
2110	default:
2111		break;
2112	}
2113}
2114
2115static void tgl_whitelist_build(struct intel_engine_cs *engine)
2116{
2117	struct i915_wa_list *w = &engine->whitelist;
2118
2119	allow_read_ctx_timestamp(engine);
2120
2121	switch (engine->class) {
2122	case RENDER_CLASS:
2123		/*
2124		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2125		 * Wa_1408556865:tgl
2126		 *
2127		 * This covers 4 registers which are next to one another :
2128		 *   - PS_INVOCATION_COUNT
2129		 *   - PS_INVOCATION_COUNT_UDW
2130		 *   - PS_DEPTH_COUNT
2131		 *   - PS_DEPTH_COUNT_UDW
2132		 */
2133		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2134				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2135				  RING_FORCE_TO_NONPRIV_RANGE_4);
2136
2137		/*
2138		 * Wa_1808121037:tgl
2139		 * Wa_14012131227:dg1
2140		 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
2141		 */
2142		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2143
2144		/* Wa_1806527549:tgl */
2145		whitelist_reg(w, HIZ_CHICKEN);
2146
2147		/* Required by recommended tuning setting (not a workaround) */
2148		whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
2149
2150		break;
2151	default:
2152		break;
2153	}
2154}
2155
2156static void dg2_whitelist_build(struct intel_engine_cs *engine)
2157{
2158	struct i915_wa_list *w = &engine->whitelist;
2159
2160	switch (engine->class) {
2161	case RENDER_CLASS:
2162		/* Required by recommended tuning setting (not a workaround) */
2163		whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2164
2165		break;
2166	default:
2167		break;
2168	}
2169}
2170
2171static void blacklist_trtt(struct intel_engine_cs *engine)
2172{
2173	struct i915_wa_list *w = &engine->whitelist;
2174
2175	/*
2176	 * Prevent read/write access to [0x4400, 0x4600) which covers
2177	 * the TRTT range across all engines. Note that normally userspace
2178	 * cannot access the other engines' trtt control, but for simplicity
2179	 * we cover the entire range on each engine.
2180	 */
2181	whitelist_reg_ext(w, _MMIO(0x4400),
2182			  RING_FORCE_TO_NONPRIV_DENY |
2183			  RING_FORCE_TO_NONPRIV_RANGE_64);
2184	whitelist_reg_ext(w, _MMIO(0x4500),
2185			  RING_FORCE_TO_NONPRIV_DENY |
2186			  RING_FORCE_TO_NONPRIV_RANGE_64);
2187}
2188
2189static void pvc_whitelist_build(struct intel_engine_cs *engine)
2190{
2191	/* Wa_16014440446:pvc */
2192	blacklist_trtt(engine);
2193}
2194
2195static void xelpg_whitelist_build(struct intel_engine_cs *engine)
2196{
2197	struct i915_wa_list *w = &engine->whitelist;
2198
2199	switch (engine->class) {
2200	case RENDER_CLASS:
2201		/* Required by recommended tuning setting (not a workaround) */
2202		whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2203
2204		break;
2205	default:
2206		break;
2207	}
2208}
2209
2210void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2211{
2212	struct drm_i915_private *i915 = engine->i915;
2213	struct i915_wa_list *w = &engine->whitelist;
2214
2215	wa_init_start(w, engine->gt, "whitelist", engine->name);
2216
2217	if (engine->gt->type == GT_MEDIA)
2218		; /* none yet */
2219	else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
2220		xelpg_whitelist_build(engine);
2221	else if (IS_PONTEVECCHIO(i915))
2222		pvc_whitelist_build(engine);
2223	else if (IS_DG2(i915))
2224		dg2_whitelist_build(engine);
2225	else if (IS_XEHPSDV(i915))
2226		; /* none needed */
2227	else if (GRAPHICS_VER(i915) == 12)
2228		tgl_whitelist_build(engine);
2229	else if (GRAPHICS_VER(i915) == 11)
2230		icl_whitelist_build(engine);
2231	else if (IS_COMETLAKE(i915))
2232		cml_whitelist_build(engine);
2233	else if (IS_COFFEELAKE(i915))
2234		cfl_whitelist_build(engine);
2235	else if (IS_GEMINILAKE(i915))
2236		glk_whitelist_build(engine);
2237	else if (IS_KABYLAKE(i915))
2238		kbl_whitelist_build(engine);
2239	else if (IS_BROXTON(i915))
2240		bxt_whitelist_build(engine);
2241	else if (IS_SKYLAKE(i915))
2242		skl_whitelist_build(engine);
2243	else if (GRAPHICS_VER(i915) <= 8)
2244		;
2245	else
2246		MISSING_CASE(GRAPHICS_VER(i915));
2247
2248	wa_init_finish(w);
2249}
2250
2251void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2252{
2253	const struct i915_wa_list *wal = &engine->whitelist;
2254	struct intel_uncore *uncore = engine->uncore;
2255	const u32 base = engine->mmio_base;
2256	struct i915_wa *wa;
2257	unsigned int i;
2258
2259	if (!wal->count)
2260		return;
2261
2262	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2263		intel_uncore_write(uncore,
2264				   RING_FORCE_TO_NONPRIV(base, i),
2265				   i915_mmio_reg_offset(wa->reg));
2266
2267	/* And clear the rest just in case of garbage */
2268	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2269		intel_uncore_write(uncore,
2270				   RING_FORCE_TO_NONPRIV(base, i),
2271				   i915_mmio_reg_offset(RING_NOPID(base)));
2272}
2273
2274/*
2275 * engine_fake_wa_init(), a place holder to program the registers
2276 * which are not part of an official workaround defined by the
2277 * hardware team.
2278 * Adding programming of those register inside workaround will
2279 * allow utilizing wa framework to proper application and verification.
2280 */
2281static void
2282engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2283{
2284	u8 mocs_w, mocs_r;
2285
2286	/*
2287	 * RING_CMD_CCTL specifies the default MOCS entry that will be used
2288	 * by the command streamer when executing commands that don't have
2289	 * a way to explicitly specify a MOCS setting.  The default should
2290	 * usually reference whichever MOCS entry corresponds to uncached
2291	 * behavior, although use of a WB cached entry is recommended by the
2292	 * spec in certain circumstances on specific platforms.
2293	 */
2294	if (GRAPHICS_VER(engine->i915) >= 12) {
2295		mocs_r = engine->gt->mocs.uc_index;
2296		mocs_w = engine->gt->mocs.uc_index;
2297
2298		if (HAS_L3_CCS_READ(engine->i915) &&
2299		    engine->class == COMPUTE_CLASS) {
2300			mocs_r = engine->gt->mocs.wb_index;
2301
2302			/*
2303			 * Even on the few platforms where MOCS 0 is a
2304			 * legitimate table entry, it's never the correct
2305			 * setting to use here; we can assume the MOCS init
2306			 * just forgot to initialize wb_index.
2307			 */
2308			drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
2309		}
2310
2311		wa_masked_field_set(wal,
2312				    RING_CMD_CCTL(engine->mmio_base),
2313				    CMD_CCTL_MOCS_MASK,
2314				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2315	}
2316}
2317
2318static void
2319rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2320{
2321	struct drm_i915_private *i915 = engine->i915;
2322	struct intel_gt *gt = engine->gt;
2323
2324	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2325	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
2326		/* Wa_22014600077 */
2327		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2328				 ENABLE_EU_COUNT_FOR_TDL_FLUSH);
2329	}
2330
2331	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2332	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2333	    IS_DG2(i915)) {
2334		/* Wa_1509727124 */
2335		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2336				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
2337	}
2338
2339	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2340	    IS_DG2(i915)) {
2341		/* Wa_22012856258 */
2342		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2343				 GEN12_DISABLE_READ_SUPPRESSION);
2344	}
2345
2346	if (IS_DG2(i915)) {
2347		/*
2348		 * Wa_22010960976:dg2
2349		 * Wa_14013347512:dg2
2350		 */
2351		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
2352				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2353	}
2354
2355	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
2356	    IS_DG2(i915)) {
2357		/* Wa_14015150844 */
2358		wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
2359			   _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
2360			   0, true);
2361	}
2362
2363	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2364	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2365		/*
2366		 * Wa_1606700617:tgl,dg1,adl-p
2367		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2368		 * Wa_14010826681:tgl,dg1,rkl,adl-p
2369		 * Wa_18019627453:dg2
2370		 */
2371		wa_masked_en(wal,
2372			     GEN9_CS_DEBUG_MODE1,
2373			     FF_DOP_CLOCK_GATE_DISABLE);
2374	}
2375
2376	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2377	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2378		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2379		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2380
2381		/*
2382		 * Wa_1407928979:tgl A*
2383		 * Wa_18011464164:tgl[B0+],dg1[B0+]
2384		 * Wa_22010931296:tgl[B0+],dg1[B0+]
2385		 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2386		 */
2387		wa_write_or(wal, GEN7_FF_THREAD_MODE,
2388			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2389
2390		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2391		wa_mcr_masked_en(wal,
2392				 GEN10_SAMPLER_MODE,
2393				 ENABLE_SMALLPL);
2394	}
2395
2396	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2397	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2398		/* Wa_1409804808 */
2399		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2400				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2401
2402		/* Wa_14010229206 */
2403		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2404	}
2405
2406	if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2407		/*
2408		 * Wa_1607297627
2409		 *
2410		 * On TGL and RKL there are multiple entries for this WA in the
2411		 * BSpec; some indicate this is an A0-only WA, others indicate
2412		 * it applies to all steppings so we trust the "all steppings."
2413		 */
2414		wa_masked_en(wal,
2415			     RING_PSMI_CTL(RENDER_RING_BASE),
2416			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
2417			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2418	}
2419
2420	if (GRAPHICS_VER(i915) == 11) {
2421		/* This is not an Wa. Enable for better image quality */
2422		wa_masked_en(wal,
2423			     _3D_CHICKEN3,
2424			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
2425
2426		/*
2427		 * Wa_1405543622:icl
2428		 * Formerly known as WaGAPZPriorityScheme
2429		 */
2430		wa_write_or(wal,
2431			    GEN8_GARBCNTL,
2432			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
2433
2434		/*
2435		 * Wa_1604223664:icl
2436		 * Formerly known as WaL3BankAddressHashing
2437		 */
2438		wa_write_clr_set(wal,
2439				 GEN8_GARBCNTL,
2440				 GEN11_HASH_CTRL_EXCL_MASK,
2441				 GEN11_HASH_CTRL_EXCL_BIT0);
2442		wa_write_clr_set(wal,
2443				 GEN11_GLBLINVL,
2444				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
2445				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2446
2447		/*
2448		 * Wa_1405733216:icl
2449		 * Formerly known as WaDisableCleanEvicts
2450		 */
2451		wa_mcr_write_or(wal,
2452				GEN8_L3SQCREG4,
2453				GEN11_LQSC_CLEAN_EVICT_DISABLE);
2454
2455		/* Wa_1606682166:icl */
2456		wa_write_or(wal,
2457			    GEN7_SARCHKMD,
2458			    GEN7_DISABLE_SAMPLER_PREFETCH);
2459
2460		/* Wa_1409178092:icl */
2461		wa_mcr_write_clr_set(wal,
2462				     GEN11_SCRATCH2,
2463				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
2464				     0);
2465
2466		/* WaEnable32PlaneMode:icl */
2467		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
2468			     GEN11_ENABLE_32_PLANE_MODE);
2469
2470		/*
2471		 * Wa_1408767742:icl[a2..forever],ehl[all]
2472		 * Wa_1605460711:icl[a0..c0]
2473		 */
2474		wa_write_or(wal,
2475			    GEN7_FF_THREAD_MODE,
2476			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2477
2478		/* Wa_22010271021 */
2479		wa_masked_en(wal,
2480			     GEN9_CS_DEBUG_MODE1,
2481			     FF_DOP_CLOCK_GATE_DISABLE);
2482	}
2483
2484	/*
2485	 * Intel platforms that support fine-grained preemption (i.e., gen9 and
2486	 * beyond) allow the kernel-mode driver to choose between two different
2487	 * options for controlling preemption granularity and behavior.
2488	 *
2489	 * Option 1 (hardware default):
2490	 *   Preemption settings are controlled in a global manner via
2491	 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
2492	 *   and settings chosen by the kernel-mode driver will apply to all
2493	 *   userspace clients.
2494	 *
2495	 * Option 2:
2496	 *   Preemption settings are controlled on a per-context basis via
2497	 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
2498	 *   context switch and is writable by userspace (e.g., via
2499	 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2500	 *   which allows different userspace drivers/clients to select
2501	 *   different settings, or to change those settings on the fly in
2502	 *   response to runtime needs.  This option was known by name
2503	 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
2504	 *   that name is somewhat misleading as other non-granularity
2505	 *   preemption settings are also impacted by this decision.
2506	 *
2507	 * On Linux, our policy has always been to let userspace drivers
2508	 * control preemption granularity/settings (Option 2).  This was
2509	 * originally mandatory on gen9 to prevent ABI breakage (old gen9
2510	 * userspace developed before object-level preemption was enabled would
2511	 * not behave well if i915 were to go with Option 1 and enable that
2512	 * preemption in a global manner).  On gen9 each context would have
2513	 * object-level preemption disabled by default (see
2514	 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2515	 * userspace drivers could opt-in to object-level preemption as they
2516	 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
2517	 * even though it is no longer necessary for ABI compatibility when
2518	 * enabling a new platform, it does ensure that userspace will be able
2519	 * to implement any workarounds that show up requiring temporary
2520	 * adjustments to preemption behavior at runtime.
2521	 *
2522	 * Notes/Workarounds:
2523	 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
2524	 *      CS_CHICKEN1[0] does not disable object-level preemption as
2525	 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2526	 *      using Option 1).  Effectively this means userspace is unable
2527	 *      to disable object-level preemption on these platforms/steppings
2528	 *      despite the setting here.
2529	 *
2530	 *  - Wa_16013994831:  May require that userspace program
2531	 *      CS_CHICKEN1[10] when certain runtime conditions are true.
2532	 *      Userspace requires Option 2 to be in effect for their update of
2533	 *      CS_CHICKEN1[10] to be effective.
2534	 *
2535	 * Other workarounds may appear in the future that will also require
2536	 * Option 2 behavior to allow proper userspace implementation.
2537	 */
2538	if (GRAPHICS_VER(i915) >= 9)
2539		wa_masked_en(wal,
2540			     GEN7_FF_SLICE_CS_CHICKEN1,
2541			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
2542
2543	if (IS_SKYLAKE(i915) ||
2544	    IS_KABYLAKE(i915) ||
2545	    IS_COFFEELAKE(i915) ||
2546	    IS_COMETLAKE(i915)) {
2547		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2548		wa_write_or(wal,
2549			    GEN8_GARBCNTL,
2550			    GEN9_GAPS_TSV_CREDIT_DISABLE);
2551	}
2552
2553	if (IS_BROXTON(i915)) {
2554		/* WaDisablePooledEuLoadBalancingFix:bxt */
2555		wa_masked_en(wal,
2556			     FF_SLICE_CS_CHICKEN2,
2557			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
2558	}
2559
2560	if (GRAPHICS_VER(i915) == 9) {
2561		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2562		wa_masked_en(wal,
2563			     GEN9_CSFE_CHICKEN1_RCS,
2564			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
2565
2566		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2567		wa_mcr_write_or(wal,
2568				BDW_SCRATCH1,
2569				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2570
2571		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2572		if (IS_GEN9_LP(i915))
2573			wa_mcr_write_clr_set(wal,
2574					     GEN8_L3SQCREG1,
2575					     L3_PRIO_CREDITS_MASK,
2576					     L3_GENERAL_PRIO_CREDITS(62) |
2577					     L3_HIGH_PRIO_CREDITS(2));
2578
2579		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2580		wa_mcr_write_or(wal,
2581				GEN8_L3SQCREG4,
2582				GEN8_LQSC_FLUSH_COHERENT_LINES);
2583
2584		/* Disable atomics in L3 to prevent unrecoverable hangs */
2585		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
2586				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2587		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
2588				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2589		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
2590				     EVICTION_PERF_FIX_ENABLE, 0);
2591	}
2592
2593	if (IS_HASWELL(i915)) {
2594		/* WaSampleCChickenBitEnable:hsw */
2595		wa_masked_en(wal,
2596			     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2597
2598		wa_masked_dis(wal,
2599			      CACHE_MODE_0_GEN7,
2600			      /* enable HiZ Raw Stall Optimization */
2601			      HIZ_RAW_STALL_OPT_DISABLE);
2602	}
2603
2604	if (IS_VALLEYVIEW(i915)) {
2605		/* WaDisableEarlyCull:vlv */
2606		wa_masked_en(wal,
2607			     _3D_CHICKEN3,
2608			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2609
2610		/*
2611		 * WaVSThreadDispatchOverride:ivb,vlv
2612		 *
2613		 * This actually overrides the dispatch
2614		 * mode for all thread types.
2615		 */
2616		wa_write_clr_set(wal,
2617				 GEN7_FF_THREAD_MODE,
2618				 GEN7_FF_SCHED_MASK,
2619				 GEN7_FF_TS_SCHED_HW |
2620				 GEN7_FF_VS_SCHED_HW |
2621				 GEN7_FF_DS_SCHED_HW);
2622
2623		/* WaPsdDispatchEnable:vlv */
2624		/* WaDisablePSDDualDispatchEnable:vlv */
2625		wa_masked_en(wal,
2626			     GEN7_HALF_SLICE_CHICKEN1,
2627			     GEN7_MAX_PS_THREAD_DEP |
2628			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2629	}
2630
2631	if (IS_IVYBRIDGE(i915)) {
2632		/* WaDisableEarlyCull:ivb */
2633		wa_masked_en(wal,
2634			     _3D_CHICKEN3,
2635			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2636
2637		if (0) { /* causes HiZ corruption on ivb:gt1 */
2638			/* enable HiZ Raw Stall Optimization */
2639			wa_masked_dis(wal,
2640				      CACHE_MODE_0_GEN7,
2641				      HIZ_RAW_STALL_OPT_DISABLE);
2642		}
2643
2644		/*
2645		 * WaVSThreadDispatchOverride:ivb,vlv
2646		 *
2647		 * This actually overrides the dispatch
2648		 * mode for all thread types.
2649		 */
2650		wa_write_clr_set(wal,
2651				 GEN7_FF_THREAD_MODE,
2652				 GEN7_FF_SCHED_MASK,
2653				 GEN7_FF_TS_SCHED_HW |
2654				 GEN7_FF_VS_SCHED_HW |
2655				 GEN7_FF_DS_SCHED_HW);
2656
2657		/* WaDisablePSDDualDispatchEnable:ivb */
2658		if (IS_IVB_GT1(i915))
2659			wa_masked_en(wal,
2660				     GEN7_HALF_SLICE_CHICKEN1,
2661				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2662	}
2663
2664	if (GRAPHICS_VER(i915) == 7) {
2665		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2666		wa_masked_en(wal,
2667			     RING_MODE_GEN7(RENDER_RING_BASE),
2668			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
2669
2670		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2671		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
2672
2673		/*
2674		 * BSpec says this must be set, even though
2675		 * WaDisable4x2SubspanOptimization:ivb,hsw
2676		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
2677		 */
2678		wa_masked_en(wal,
2679			     CACHE_MODE_1,
2680			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
2681
2682		/*
2683		 * BSpec recommends 8x4 when MSAA is used,
2684		 * however in practice 16x4 seems fastest.
2685		 *
2686		 * Note that PS/WM thread counts depend on the WIZ hashing
2687		 * disable bit, which we don't touch here, but it's good
2688		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2689		 */
2690		wa_masked_field_set(wal,
2691				    GEN7_GT_MODE,
2692				    GEN6_WIZ_HASHING_MASK,
2693				    GEN6_WIZ_HASHING_16x4);
2694	}
2695
2696	if (IS_GRAPHICS_VER(i915, 6, 7))
2697		/*
2698		 * We need to disable the AsyncFlip performance optimisations in
2699		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
2700		 * already be programmed to '1' on all products.
2701		 *
2702		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2703		 */
2704		wa_masked_en(wal,
2705			     RING_MI_MODE(RENDER_RING_BASE),
2706			     ASYNC_FLIP_PERF_DISABLE);
2707
2708	if (GRAPHICS_VER(i915) == 6) {
2709		/*
2710		 * Required for the hardware to program scanline values for
2711		 * waiting
2712		 * WaEnableFlushTlbInvalidationMode:snb
2713		 */
2714		wa_masked_en(wal,
2715			     GFX_MODE,
2716			     GFX_TLB_INVALIDATE_EXPLICIT);
2717
2718		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2719		wa_masked_en(wal,
2720			     _3D_CHICKEN,
2721			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
2722
2723		wa_masked_en(wal,
2724			     _3D_CHICKEN3,
2725			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
2726			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2727			     /*
2728			      * Bspec says:
2729			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
2730			      * to normal and 3DSTATE_SF number of SF output attributes
2731			      * is more than 16."
2732			      */
2733			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2734
2735		/*
2736		 * BSpec recommends 8x4 when MSAA is used,
2737		 * however in practice 16x4 seems fastest.
2738		 *
2739		 * Note that PS/WM thread counts depend on the WIZ hashing
2740		 * disable bit, which we don't touch here, but it's good
2741		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2742		 */
2743		wa_masked_field_set(wal,
2744				    GEN6_GT_MODE,
2745				    GEN6_WIZ_HASHING_MASK,
2746				    GEN6_WIZ_HASHING_16x4);
2747
2748		/* WaDisable_RenderCache_OperationalFlush:snb */
2749		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2750
2751		/*
2752		 * From the Sandybridge PRM, volume 1 part 3, page 24:
2753		 * "If this bit is set, STCunit will have LRA as replacement
2754		 *  policy. [...] This bit must be reset. LRA replacement
2755		 *  policy is not supported."
2756		 */
2757		wa_masked_dis(wal,
2758			      CACHE_MODE_0,
2759			      CM0_STC_EVICT_DISABLE_LRA_SNB);
2760	}
2761
2762	if (IS_GRAPHICS_VER(i915, 4, 6))
2763		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2764		wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2765		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2766		       /* XXX bit doesn't stick on Broadwater */
2767		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2768
2769	if (GRAPHICS_VER(i915) == 4)
2770		/*
2771		 * Disable CONSTANT_BUFFER before it is loaded from the context
2772		 * image. For as it is loaded, it is executed and the stored
2773		 * address may no longer be valid, leading to a GPU hang.
2774		 *
2775		 * This imposes the requirement that userspace reload their
2776		 * CONSTANT_BUFFER on every batch, fortunately a requirement
2777		 * they are already accustomed to from before contexts were
2778		 * enabled.
2779		 */
2780		wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2781		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2782		       0 /* XXX bit doesn't stick on Broadwater */,
2783		       true);
2784}
2785
2786static void
2787xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2788{
2789	struct drm_i915_private *i915 = engine->i915;
2790
2791	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2792	if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2793		wa_write(wal,
2794			 RING_SEMA_WAIT_POLL(engine->mmio_base),
2795			 1);
2796	}
2797	/* Wa_16018031267, Wa_16018063123 */
2798	if (NEEDS_FASTCOLOR_BLT_WABB(engine))
2799		wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
2800				    XEHP_BLITTER_SCHEDULING_MODE_MASK,
2801				    XEHP_BLITTER_ROUND_ROBIN_MODE);
2802}
2803
2804static void
2805ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2806{
2807	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
2808		/* Wa_14014999345:pvc */
2809		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2810	}
2811}
2812
2813/*
2814 * The bspec performance guide has recommended MMIO tuning settings.  These
2815 * aren't truly "workarounds" but we want to program them with the same
2816 * workaround infrastructure to ensure that they're automatically added to
2817 * the GuC save/restore lists, re-applied at the right times, and checked for
2818 * any conflicting programming requested by real workarounds.
2819 *
2820 * Programming settings should be added here only if their registers are not
2821 * part of an engine's register state context.  If a register is part of a
2822 * context, then any tuning settings should be programmed in an appropriate
2823 * function invoked by __intel_engine_init_ctx_wa().
2824 */
2825static void
2826add_render_compute_tuning_settings(struct intel_gt *gt,
2827				   struct i915_wa_list *wal)
2828{
2829	struct drm_i915_private *i915 = gt->i915;
2830
2831	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
2832		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2833
2834	/*
2835	 * This tuning setting proves beneficial only on ATS-M designs; the
2836	 * default "age based" setting is optimal on regular DG2 and other
2837	 * platforms.
2838	 */
2839	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2840		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
2841					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2842
2843	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
2844		wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
2845}
2846
2847/*
2848 * The workarounds in this function apply to shared registers in
2849 * the general render reset domain that aren't tied to a
2850 * specific engine.  Since all render+compute engines get reset
2851 * together, and the contents of these registers are lost during
2852 * the shared render domain reset, we'll define such workarounds
2853 * here and then add them to just a single RCS or CCS engine's
2854 * workaround list (whichever engine has the XXXX flag).
2855 */
2856static void
2857general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2858{
2859	struct drm_i915_private *i915 = engine->i915;
2860	struct intel_gt *gt = engine->gt;
2861
2862	add_render_compute_tuning_settings(gt, wal);
2863
2864	if (GRAPHICS_VER(i915) >= 11) {
2865		/* This is not a Wa (although referred to as
2866		 * WaSetInidrectStateOverride in places), this allows
2867		 * applications that reference sampler states through
2868		 * the BindlessSamplerStateBaseAddress to have their
2869		 * border color relative to DynamicStateBaseAddress
2870		 * rather than BindlessSamplerStateBaseAddress.
2871		 *
2872		 * Otherwise SAMPLER_STATE border colors have to be
2873		 * copied in multiple heaps (DynamicStateBaseAddress &
2874		 * BindlessSamplerStateBaseAddress)
2875		 *
2876		 * BSpec: 46052
2877		 */
2878		wa_mcr_masked_en(wal,
2879				 GEN10_SAMPLER_MODE,
2880				 GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
2881	}
2882
2883	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
2884	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
2885		/* Wa_14017856879 */
2886		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
2887
2888	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2889	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2890		/*
2891		 * Wa_14017066071
2892		 * Wa_14017654203
2893		 */
2894		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2895				 MTL_DISABLE_SAMPLER_SC_OOO);
2896
2897	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2898		/* Wa_22015279794 */
2899		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2900				 DISABLE_PREFETCH_INTO_IC);
2901
2902	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2903	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2904	    IS_DG2(i915)) {
2905		/* Wa_22013037850 */
2906		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
2907				DISABLE_128B_EVICTION_COMMAND_UDW);
2908
2909		/* Wa_18017747507 */
2910		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
2911	}
2912
2913	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2914	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2915	    IS_PONTEVECCHIO(i915) ||
2916	    IS_DG2(i915)) {
2917		/* Wa_22014226127 */
2918		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
2919	}
2920
2921	if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
2922		/* Wa_14015227452:dg2,pvc */
2923		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
2924
2925		/* Wa_16015675438:dg2,pvc */
2926		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
2927	}
2928
2929	if (IS_DG2(i915)) {
2930		/*
2931		 * Wa_16011620976:dg2_g11
2932		 * Wa_22015475538:dg2
2933		 */
2934		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
2935
2936		/* Wa_18028616096 */
2937		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
2938	}
2939
2940	if (IS_DG2_G11(i915)) {
2941		/*
2942		 * Wa_22012826095:dg2
2943		 * Wa_22013059131:dg2
2944		 */
2945		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
2946				     MAXREQS_PER_BANK,
2947				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
2948
2949		/* Wa_22013059131:dg2 */
2950		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
2951				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
2952
2953		/*
2954		 * Wa_22012654132
2955		 *
2956		 * Note that register 0xE420 is write-only and cannot be read
2957		 * back for verification on DG2 (due to Wa_14012342262), so
2958		 * we need to explicitly skip the readback.
2959		 */
2960		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
2961			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
2962			   0 /* write-only, so skip validation */,
2963			   true);
2964	}
2965
2966	if (IS_XEHPSDV(i915)) {
2967		/* Wa_1409954639 */
2968		wa_mcr_masked_en(wal,
2969				 GEN8_ROW_CHICKEN,
2970				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
2971
2972		/* Wa_1607196519 */
2973		wa_mcr_masked_en(wal,
2974				 GEN9_ROW_CHICKEN4,
2975				 GEN12_DISABLE_GRF_CLEAR);
2976
2977		/* Wa_14010449647:xehpsdv */
2978		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
2979				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2980	}
2981}
2982
2983static void
2984engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2985{
2986	if (GRAPHICS_VER(engine->i915) < 4)
2987		return;
2988
2989	engine_fake_wa_init(engine, wal);
2990
2991	/*
2992	 * These are common workarounds that just need to applied
2993	 * to a single RCS/CCS engine's workaround list since
2994	 * they're reset as part of the general render domain reset.
2995	 */
2996	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
2997		general_render_compute_wa_init(engine, wal);
2998
2999	if (engine->class == COMPUTE_CLASS)
3000		ccs_engine_wa_init(engine, wal);
3001	else if (engine->class == RENDER_CLASS)
3002		rcs_engine_wa_init(engine, wal);
3003	else
3004		xcs_engine_wa_init(engine, wal);
3005}
3006
3007void intel_engine_init_workarounds(struct intel_engine_cs *engine)
3008{
3009	struct i915_wa_list *wal = &engine->wa_list;
3010
3011	wa_init_start(wal, engine->gt, "engine", engine->name);
3012	engine_init_workarounds(engine, wal);
3013	wa_init_finish(wal);
3014}
3015
3016void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
3017{
3018	wa_list_apply(&engine->wa_list);
3019}
3020
3021static const struct i915_range mcr_ranges_gen8[] = {
3022	{ .start = 0x5500, .end = 0x55ff },
3023	{ .start = 0x7000, .end = 0x7fff },
3024	{ .start = 0x9400, .end = 0x97ff },
3025	{ .start = 0xb000, .end = 0xb3ff },
3026	{ .start = 0xe000, .end = 0xe7ff },
3027	{},
3028};
3029
3030static const struct i915_range mcr_ranges_gen12[] = {
3031	{ .start =  0x8150, .end =  0x815f },
3032	{ .start =  0x9520, .end =  0x955f },
3033	{ .start =  0xb100, .end =  0xb3ff },
3034	{ .start =  0xde80, .end =  0xe8ff },
3035	{ .start = 0x24a00, .end = 0x24a7f },
3036	{},
3037};
3038
3039static const struct i915_range mcr_ranges_xehp[] = {
3040	{ .start =  0x4000, .end =  0x4aff },
3041	{ .start =  0x5200, .end =  0x52ff },
3042	{ .start =  0x5400, .end =  0x7fff },
3043	{ .start =  0x8140, .end =  0x815f },
3044	{ .start =  0x8c80, .end =  0x8dff },
3045	{ .start =  0x94d0, .end =  0x955f },
3046	{ .start =  0x9680, .end =  0x96ff },
3047	{ .start =  0xb000, .end =  0xb3ff },
3048	{ .start =  0xc800, .end =  0xcfff },
3049	{ .start =  0xd800, .end =  0xd8ff },
3050	{ .start =  0xdc00, .end =  0xffff },
3051	{ .start = 0x17000, .end = 0x17fff },
3052	{ .start = 0x24a00, .end = 0x24a7f },
3053	{},
3054};
3055
3056static bool mcr_range(struct drm_i915_private *i915, u32 offset)
3057{
3058	const struct i915_range *mcr_ranges;
3059	int i;
3060
3061	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
3062		mcr_ranges = mcr_ranges_xehp;
3063	else if (GRAPHICS_VER(i915) >= 12)
3064		mcr_ranges = mcr_ranges_gen12;
3065	else if (GRAPHICS_VER(i915) >= 8)
3066		mcr_ranges = mcr_ranges_gen8;
3067	else
3068		return false;
3069
3070	/*
3071	 * Registers in these ranges are affected by the MCR selector
3072	 * which only controls CPU initiated MMIO. Routing does not
3073	 * work for CS access so we cannot verify them on this path.
3074	 */
3075	for (i = 0; mcr_ranges[i].start; i++)
3076		if (offset >= mcr_ranges[i].start &&
3077		    offset <= mcr_ranges[i].end)
3078			return true;
3079
3080	return false;
3081}
3082
3083static int
3084wa_list_srm(struct i915_request *rq,
3085	    const struct i915_wa_list *wal,
3086	    struct i915_vma *vma)
3087{
3088	struct drm_i915_private *i915 = rq->i915;
3089	unsigned int i, count = 0;
3090	const struct i915_wa *wa;
3091	u32 srm, *cs;
3092
3093	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3094	if (GRAPHICS_VER(i915) >= 8)
3095		srm++;
3096
3097	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3098		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
3099			count++;
3100	}
3101
3102	cs = intel_ring_begin(rq, 4 * count);
3103	if (IS_ERR(cs))
3104		return PTR_ERR(cs);
3105
3106	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3107		u32 offset = i915_mmio_reg_offset(wa->reg);
3108
3109		if (mcr_range(i915, offset))
3110			continue;
3111
3112		*cs++ = srm;
3113		*cs++ = offset;
3114		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
3115		*cs++ = 0;
3116	}
3117	intel_ring_advance(rq, cs);
3118
3119	return 0;
3120}
3121
3122static int engine_wa_list_verify(struct intel_context *ce,
3123				 const struct i915_wa_list * const wal,
3124				 const char *from)
3125{
3126	const struct i915_wa *wa;
3127	struct i915_request *rq;
3128	struct i915_vma *vma;
3129	struct i915_gem_ww_ctx ww;
3130	unsigned int i;
3131	u32 *results;
3132	int err;
3133
3134	if (!wal->count)
3135		return 0;
3136
3137	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
3138					   wal->count * sizeof(u32));
3139	if (IS_ERR(vma))
3140		return PTR_ERR(vma);
3141
3142	intel_engine_pm_get(ce->engine);
3143	i915_gem_ww_ctx_init(&ww, false);
3144retry:
3145	err = i915_gem_object_lock(vma->obj, &ww);
3146	if (err == 0)
3147		err = intel_context_pin_ww(ce, &ww);
3148	if (err)
3149		goto err_pm;
3150
3151	err = i915_vma_pin_ww(vma, &ww, 0, 0,
3152			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
3153	if (err)
3154		goto err_unpin;
3155
3156	rq = i915_request_create(ce);
3157	if (IS_ERR(rq)) {
3158		err = PTR_ERR(rq);
3159		goto err_vma;
3160	}
3161
3162	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3163	if (err == 0)
3164		err = wa_list_srm(rq, wal, vma);
3165
3166	i915_request_get(rq);
3167	if (err)
3168		i915_request_set_error_once(rq, err);
3169	i915_request_add(rq);
3170
3171	if (err)
3172		goto err_rq;
3173
3174	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3175		err = -ETIME;
3176		goto err_rq;
3177	}
3178
3179	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
3180	if (IS_ERR(results)) {
3181		err = PTR_ERR(results);
3182		goto err_rq;
3183	}
3184
3185	err = 0;
3186	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3187		if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
3188			continue;
3189
3190		if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3191			err = -ENXIO;
3192	}
3193
3194	i915_gem_object_unpin_map(vma->obj);
3195
3196err_rq:
3197	i915_request_put(rq);
3198err_vma:
3199	i915_vma_unpin(vma);
3200err_unpin:
3201	intel_context_unpin(ce);
3202err_pm:
3203	if (err == -EDEADLK) {
3204		err = i915_gem_ww_ctx_backoff(&ww);
3205		if (!err)
3206			goto retry;
3207	}
3208	i915_gem_ww_ctx_fini(&ww);
3209	intel_engine_pm_put(ce->engine);
3210	i915_vma_put(vma);
3211	return err;
3212}
3213
3214int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
3215				    const char *from)
3216{
3217	return engine_wa_list_verify(engine->kernel_context,
3218				     &engine->wa_list,
3219				     from);
3220}
3221
3222#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3223#include "selftest_workarounds.c"
3224#endif