Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014-2018 Intel Corporation
   4 */
   5
   6#include "i915_drv.h"
   7#include "i915_reg.h"
   8#include "intel_context.h"
   9#include "intel_engine_pm.h"
  10#include "intel_engine_regs.h"
  11#include "intel_gpu_commands.h"
  12#include "intel_gt.h"
  13#include "intel_gt_ccs_mode.h"
  14#include "intel_gt_mcr.h"
  15#include "intel_gt_print.h"
  16#include "intel_gt_regs.h"
  17#include "intel_ring.h"
  18#include "intel_workarounds.h"
  19
  20/**
  21 * DOC: Hardware workarounds
  22 *
  23 * Hardware workarounds are register programming documented to be executed in
  24 * the driver that fall outside of the normal programming sequences for a
  25 * platform. There are some basic categories of workarounds, depending on
  26 * how/when they are applied:
  27 *
  28 * - Context workarounds: workarounds that touch registers that are
  29 *   saved/restored to/from the HW context image. The list is emitted (via Load
  30 *   Register Immediate commands) once when initializing the device and saved in
  31 *   the default context. That default context is then used on every context
  32 *   creation to have a "primed golden context", i.e. a context image that
  33 *   already contains the changes needed to all the registers.
  34 *
  35 *   Context workarounds should be implemented in the \*_ctx_workarounds_init()
  36 *   variants respective to the targeted platforms.
  37 *
  38 * - Engine workarounds: the list of these WAs is applied whenever the specific
  39 *   engine is reset. It's also possible that a set of engine classes share a
  40 *   common power domain and they are reset together. This happens on some
  41 *   platforms with render and compute engines. In this case (at least) one of
  42 *   them need to keeep the workaround programming: the approach taken in the
  43 *   driver is to tie those workarounds to the first compute/render engine that
  44 *   is registered.  When executing with GuC submission, engine resets are
  45 *   outside of kernel driver control, hence the list of registers involved in
  46 *   written once, on engine initialization, and then passed to GuC, that
  47 *   saves/restores their values before/after the reset takes place. See
  48 *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
  49 *
  50 *   Workarounds for registers specific to RCS and CCS should be implemented in
  51 *   rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
  52 *   registers belonging to BCS, VCS or VECS should be implemented in
  53 *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
  54 *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
  55 *   should be implemented in general_render_compute_wa_init(). The settings
  56 *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  57 *
  58 * - GT workarounds: the list of these WAs is applied whenever these registers
  59 *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
  60 *
  61 *   GT workarounds should be implemented in the \*_gt_workarounds_init()
  62 *   variants respective to the targeted platforms.
  63 *
  64 * - Register whitelist: some workarounds need to be implemented in userspace,
  65 *   but need to touch privileged registers. The whitelist in the kernel
  66 *   instructs the hardware to allow the access to happen. From the kernel side,
  67 *   this is just a special case of a MMIO workaround (as we write the list of
  68 *   these to/be-whitelisted registers to some special HW registers).
  69 *
  70 *   Register whitelisting should be done in the \*_whitelist_build() variants
  71 *   respective to the targeted platforms.
  72 *
  73 * - Workaround batchbuffers: buffers that get executed automatically by the
  74 *   hardware on every HW context restore. These buffers are created and
  75 *   programmed in the default context so the hardware always go through those
  76 *   programming sequences when switching contexts. The support for workaround
  77 *   batchbuffers is enabled these hardware mechanisms:
  78 *
  79 *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
  80 *      context, pointing the hardware to jump to that location when that offset
  81 *      is reached in the context restore. Workaround batchbuffer in the driver
  82 *      currently uses this mechanism for all platforms.
  83 *
  84 *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
  85 *      pointing the hardware to a buffer to continue executing after the
  86 *      engine registers are restored in a context restore sequence. This is
  87 *      currently not used in the driver.
  88 *
  89 * - Other:  There are WAs that, due to their nature, cannot be applied from a
  90 *   central place. Those are peppered around the rest of the code, as needed.
  91 *   Workarounds related to the display IP are the main example.
  92 *
  93 * .. [1] Technically, some registers are powercontext saved & restored, so they
  94 *    survive a suspend/resume. In practice, writing them again is not too
  95 *    costly and simplifies things, so it's the approach taken in the driver.
  96 */
  97
  98static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
  99			  const char *name, const char *engine_name)
 100{
 101	wal->gt = gt;
 102	wal->name = name;
 103	wal->engine_name = engine_name;
 104}
 105
 106#define WA_LIST_CHUNK (1 << 4)
 107
 108static void wa_init_finish(struct i915_wa_list *wal)
 109{
 110	/* Trim unused entries. */
 111	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
 112		struct i915_wa *list = kmemdup(wal->list,
 113					       wal->count * sizeof(*list),
 114					       GFP_KERNEL);
 115
 116		if (list) {
 117			kfree(wal->list);
 118			wal->list = list;
 119		}
 120	}
 121
 122	if (!wal->count)
 123		return;
 124
 125	gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n",
 126	       wal->wa_count, wal->name, wal->engine_name);
 127}
 128
 129static enum forcewake_domains
 130wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 131{
 132	enum forcewake_domains fw = 0;
 133	struct i915_wa *wa;
 134	unsigned int i;
 135
 136	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 137		fw |= intel_uncore_forcewake_for_reg(uncore,
 138						     wa->reg,
 139						     FW_REG_READ |
 140						     FW_REG_WRITE);
 141
 142	return fw;
 143}
 144
 145static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 146{
 147	unsigned int addr = i915_mmio_reg_offset(wa->reg);
 148	struct drm_i915_private *i915 = wal->gt->i915;
 149	unsigned int start = 0, end = wal->count;
 150	const unsigned int grow = WA_LIST_CHUNK;
 151	struct i915_wa *wa_;
 152
 153	GEM_BUG_ON(!is_power_of_2(grow));
 154
 155	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
 156		struct i915_wa *list;
 157
 158		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
 159				     GFP_KERNEL);
 160		if (!list) {
 161			drm_err(&i915->drm, "No space for workaround init!\n");
 162			return;
 163		}
 164
 165		if (wal->list) {
 166			memcpy(list, wal->list, sizeof(*wa) * wal->count);
 167			kfree(wal->list);
 168		}
 169
 170		wal->list = list;
 171	}
 172
 173	while (start < end) {
 174		unsigned int mid = start + (end - start) / 2;
 175
 176		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
 177			start = mid + 1;
 178		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
 179			end = mid;
 180		} else {
 181			wa_ = &wal->list[mid];
 182
 183			if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
 184				drm_err(&i915->drm,
 185					"Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
 186					i915_mmio_reg_offset(wa_->reg),
 187					wa_->clr, wa_->set);
 188
 189				wa_->set &= ~wa->clr;
 190			}
 191
 192			wal->wa_count++;
 193			wa_->set |= wa->set;
 194			wa_->clr |= wa->clr;
 195			wa_->read |= wa->read;
 196			return;
 197		}
 198	}
 199
 200	wal->wa_count++;
 201	wa_ = &wal->list[wal->count++];
 202	*wa_ = *wa;
 203
 204	while (wa_-- > wal->list) {
 205		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
 206			   i915_mmio_reg_offset(wa_[1].reg));
 207		if (i915_mmio_reg_offset(wa_[1].reg) >
 208		    i915_mmio_reg_offset(wa_[0].reg))
 209			break;
 210
 211		swap(wa_[1], wa_[0]);
 212	}
 213}
 214
 215static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
 216		   u32 clear, u32 set, u32 read_mask, bool masked_reg)
 217{
 218	struct i915_wa wa = {
 219		.reg  = reg,
 220		.clr  = clear,
 221		.set  = set,
 222		.read = read_mask,
 223		.masked_reg = masked_reg,
 224	};
 225
 226	_wa_add(wal, &wa);
 227}
 228
 229static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
 230		       u32 clear, u32 set, u32 read_mask, bool masked_reg)
 231{
 232	struct i915_wa wa = {
 233		.mcr_reg = reg,
 234		.clr  = clear,
 235		.set  = set,
 236		.read = read_mask,
 237		.masked_reg = masked_reg,
 238		.is_mcr = 1,
 239	};
 240
 241	_wa_add(wal, &wa);
 242}
 243
 244static void
 245wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 246{
 247	wa_add(wal, reg, clear, set, clear | set, false);
 248}
 249
 250static void
 251wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
 252{
 253	wa_mcr_add(wal, reg, clear, set, clear | set, false);
 254}
 255
 256static void
 257wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 258{
 259	wa_write_clr_set(wal, reg, ~0, set);
 260}
 261
 262static void
 263wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
 264{
 265	wa_mcr_write_clr_set(wal, reg, ~0, set);
 266}
 267
 268static void
 269wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 270{
 271	wa_write_clr_set(wal, reg, set, set);
 272}
 273
 274static void
 275wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
 276{
 277	wa_mcr_write_clr_set(wal, reg, set, set);
 278}
 279
 280static void
 281wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 282{
 283	wa_write_clr_set(wal, reg, clr, 0);
 284}
 285
 286static void
 287wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
 288{
 289	wa_mcr_write_clr_set(wal, reg, clr, 0);
 290}
 291
 292/*
 293 * WA operations on "masked register". A masked register has the upper 16 bits
 294 * documented as "masked" in b-spec. Its purpose is to allow writing to just a
 295 * portion of the register without a rmw: you simply write in the upper 16 bits
 296 * the mask of bits you are going to modify.
 297 *
 298 * The wa_masked_* family of functions already does the necessary operations to
 299 * calculate the mask based on the parameters passed, so user only has to
 300 * provide the lower 16 bits of that register.
 301 */
 302
 303static void
 304wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 305{
 306	wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 307}
 308
 309static void
 310wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 311{
 312	wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 313}
 314
 315static void
 316wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 317{
 318	wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 319}
 320
 321static void
 322wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
 323{
 324	wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 325}
 326
 327static void
 328wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
 329		    u32 mask, u32 val)
 330{
 331	wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 332}
 333
 334static void
 335wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
 336			u32 mask, u32 val)
 337{
 338	wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 339}
 340
 341static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
 342				      struct i915_wa_list *wal)
 343{
 344	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 345}
 346
 347static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
 348				      struct i915_wa_list *wal)
 349{
 350	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 351}
 352
 353static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 354				      struct i915_wa_list *wal)
 355{
 356	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 357
 358	/* WaDisableAsyncFlipPerfMode:bdw,chv */
 359	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 360
 361	/* WaDisablePartialInstShootdown:bdw,chv */
 362	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 363			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 364
 365	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 366	 * workaround for a possible hang in the unlikely event a TLB
 367	 * invalidation occurs during a PSD flush.
 368	 */
 369	/* WaForceEnableNonCoherent:bdw,chv */
 370	/* WaHdcDisableFetchWhenMasked:bdw,chv */
 371	wa_masked_en(wal, HDC_CHICKEN0,
 372		     HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 373		     HDC_FORCE_NON_COHERENT);
 374
 375	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 376	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 377	 *  polygons in the same 8x4 pixel/sample area to be processed without
 378	 *  stalling waiting for the earlier ones to write to Hierarchical Z
 379	 *  buffer."
 380	 *
 381	 * This optimization is off by default for BDW and CHV; turn it on.
 382	 */
 383	wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 384
 385	/* Wa4x4STCOptimizationDisable:bdw,chv */
 386	wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 387
 388	/*
 389	 * BSpec recommends 8x4 when MSAA is used,
 390	 * however in practice 16x4 seems fastest.
 391	 *
 392	 * Note that PS/WM thread counts depend on the WIZ hashing
 393	 * disable bit, which we don't touch here, but it's good
 394	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 395	 */
 396	wa_masked_field_set(wal, GEN7_GT_MODE,
 397			    GEN6_WIZ_HASHING_MASK,
 398			    GEN6_WIZ_HASHING_16x4);
 399}
 400
 401static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 402				     struct i915_wa_list *wal)
 403{
 404	struct drm_i915_private *i915 = engine->i915;
 405
 406	gen8_ctx_workarounds_init(engine, wal);
 407
 408	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 409	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 410
 411	/* WaDisableDopClockGating:bdw
 412	 *
 413	 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 414	 * to disable EUTC clock gating.
 415	 */
 416	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
 417			 DOP_CLOCK_GATING_DISABLE);
 418
 419	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
 420			 GEN8_SAMPLER_POWER_BYPASS_DIS);
 421
 422	wa_masked_en(wal, HDC_CHICKEN0,
 423		     /* WaForceContextSaveRestoreNonCoherent:bdw */
 424		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 425		     /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 426		     (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 427}
 428
 429static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 430				     struct i915_wa_list *wal)
 431{
 432	gen8_ctx_workarounds_init(engine, wal);
 433
 434	/* WaDisableThreadStallDopClockGating:chv */
 435	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 436
 437	/* Improve HiZ throughput on CHV. */
 438	wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 439}
 440
 441static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 442				      struct i915_wa_list *wal)
 443{
 444	struct drm_i915_private *i915 = engine->i915;
 445
 446	if (HAS_LLC(i915)) {
 447		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 448		 *
 449		 * Must match Display Engine. See
 450		 * WaCompressedResourceDisplayNewHashMode.
 451		 */
 452		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 453			     GEN9_PBE_COMPRESSED_HASH_SELECTION);
 454		wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
 455				 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 456	}
 457
 458	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 459	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 460	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 461			 FLOW_CONTROL_ENABLE |
 462			 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 463
 464	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 465	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 466	wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
 467			 GEN9_ENABLE_YV12_BUGFIX |
 468			 GEN9_ENABLE_GPGPU_PREEMPTION);
 469
 470	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 471	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 472	wa_masked_en(wal, CACHE_MODE_1,
 473		     GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 474		     GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 475
 476	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 477	wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
 478			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 479
 480	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 481	wa_masked_en(wal, HDC_CHICKEN0,
 482		     HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 483		     HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 484
 485	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 486	 * both tied to WaForceContextSaveRestoreNonCoherent
 487	 * in some hsds for skl. We keep the tie for all gen9. The
 488	 * documentation is a bit hazy and so we want to get common behaviour,
 489	 * even though there is no clear evidence we would need both on kbl/bxt.
 490	 * This area has been source of system hangs so we play it safe
 491	 * and mimic the skl regardless of what bspec says.
 492	 *
 493	 * Use Force Non-Coherent whenever executing a 3D context. This
 494	 * is a workaround for a possible hang in the unlikely event
 495	 * a TLB invalidation occurs during a PSD flush.
 496	 */
 497
 498	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 499	wa_masked_en(wal, HDC_CHICKEN0,
 500		     HDC_FORCE_NON_COHERENT);
 501
 502	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 503	if (IS_SKYLAKE(i915) ||
 504	    IS_KABYLAKE(i915) ||
 505	    IS_COFFEELAKE(i915) ||
 506	    IS_COMETLAKE(i915))
 507		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
 508				 GEN8_SAMPLER_POWER_BYPASS_DIS);
 509
 510	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 511	wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 512
 513	/*
 514	 * Supporting preemption with fine-granularity requires changes in the
 515	 * batch buffer programming. Since we can't break old userspace, we
 516	 * need to set our default preemption level to safe value. Userspace is
 517	 * still able to use more fine-grained preemption levels, since in
 518	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
 519	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
 520	 * not real HW workarounds, but merely a way to start using preemption
 521	 * while maintaining old contract with userspace.
 522	 */
 523
 524	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
 525	wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 526
 527	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
 528	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 529			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 530			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 531
 532	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
 533	if (IS_GEN9_LP(i915))
 534		wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 535}
 536
 537static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
 538				struct i915_wa_list *wal)
 539{
 540	struct intel_gt *gt = engine->gt;
 541	u8 vals[3] = { 0, 0, 0 };
 542	unsigned int i;
 543
 544	for (i = 0; i < 3; i++) {
 545		u8 ss;
 546
 547		/*
 548		 * Only consider slices where one, and only one, subslice has 7
 549		 * EUs
 550		 */
 551		if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
 552			continue;
 553
 554		/*
 555		 * subslice_7eu[i] != 0 (because of the check above) and
 556		 * ss_max == 4 (maximum number of subslices possible per slice)
 557		 *
 558		 * ->    0 <= ss <= 3;
 559		 */
 560		ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
 561		vals[i] = 3 - ss;
 562	}
 563
 564	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 565		return;
 566
 567	/* Tune IZ hashing. See intel_device_info_runtime_init() */
 568	wa_masked_field_set(wal, GEN7_GT_MODE,
 569			    GEN9_IZ_HASHING_MASK(2) |
 570			    GEN9_IZ_HASHING_MASK(1) |
 571			    GEN9_IZ_HASHING_MASK(0),
 572			    GEN9_IZ_HASHING(2, vals[2]) |
 573			    GEN9_IZ_HASHING(1, vals[1]) |
 574			    GEN9_IZ_HASHING(0, vals[0]));
 575}
 576
 577static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
 578				     struct i915_wa_list *wal)
 579{
 580	gen9_ctx_workarounds_init(engine, wal);
 581	skl_tune_iz_hashing(engine, wal);
 582}
 583
 584static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 585				     struct i915_wa_list *wal)
 586{
 587	gen9_ctx_workarounds_init(engine, wal);
 588
 589	/* WaDisableThreadStallDopClockGating:bxt */
 590	wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
 591			 STALL_DOP_GATING_DISABLE);
 592
 593	/* WaToEnableHwFixForPushConstHWBug:bxt */
 594	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 595		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 596}
 597
 598static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 599				     struct i915_wa_list *wal)
 600{
 601	struct drm_i915_private *i915 = engine->i915;
 602
 603	gen9_ctx_workarounds_init(engine, wal);
 604
 605	/* WaToEnableHwFixForPushConstHWBug:kbl */
 606	if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
 607		wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 608			     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 609
 610	/* WaDisableSbeCacheDispatchPortSharing:kbl */
 611	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
 612			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 613}
 614
 615static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
 616				     struct i915_wa_list *wal)
 617{
 618	gen9_ctx_workarounds_init(engine, wal);
 619
 620	/* WaToEnableHwFixForPushConstHWBug:glk */
 621	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 622		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 623}
 624
 625static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 626				     struct i915_wa_list *wal)
 627{
 628	gen9_ctx_workarounds_init(engine, wal);
 629
 630	/* WaToEnableHwFixForPushConstHWBug:cfl */
 631	wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
 632		     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 633
 634	/* WaDisableSbeCacheDispatchPortSharing:cfl */
 635	wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
 636			 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 637}
 638
 639static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 640				     struct i915_wa_list *wal)
 641{
 642	/* Wa_1406697149 (WaDisableBankHangMode:icl) */
 643	wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
 644
 645	/* WaForceEnableNonCoherent:icl
 646	 * This is not the same workaround as in early Gen9 platforms, where
 647	 * lacking this could cause system hangs, but coherency performance
 648	 * overhead is high and only a few compute workloads really need it
 649	 * (the register is whitelisted in hardware now, so UMDs can opt in
 650	 * for coherency if they have a good reason).
 651	 */
 652	wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 653
 654	/* WaEnableFloatBlendOptimization:icl */
 655	wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
 656		   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
 657		   0 /* write-only, so skip validation */,
 658		   true);
 659
 660	/* WaDisableGPGPUMidThreadPreemption:icl */
 661	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 662			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 663			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 664
 665	/* allow headerless messages for preemptible GPGPU context */
 666	wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
 667			 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 668
 669	/* Wa_1604278689:icl,ehl */
 670	wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
 671	wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
 672			 0,
 673			 0xFFFFFFFF);
 674
 675	/* Wa_1406306137:icl,ehl */
 676	wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
 677}
 678
 679/*
 680 * These settings aren't actually workarounds, but general tuning settings that
 681 * need to be programmed on dg2 platform.
 682 */
 683static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 684				   struct i915_wa_list *wal)
 685{
 686	wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
 687	wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 688			     REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
 689	wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
 690			     FF_MODE2_TDS_TIMER_128);
 691}
 692
 693static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
 694				       struct i915_wa_list *wal)
 695{
 696	struct drm_i915_private *i915 = engine->i915;
 697
 698	/*
 699	 * Wa_1409142259:tgl,dg1,adl-p
 700	 * Wa_1409347922:tgl,dg1,adl-p
 701	 * Wa_1409252684:tgl,dg1,adl-p
 702	 * Wa_1409217633:tgl,dg1,adl-p
 703	 * Wa_1409207793:tgl,dg1,adl-p
 704	 * Wa_1409178076:tgl,dg1,adl-p
 705	 * Wa_1408979724:tgl,dg1,adl-p
 706	 * Wa_14010443199:tgl,rkl,dg1,adl-p
 707	 * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
 708	 * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
 709	 */
 710	wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
 711		     GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
 712
 713	/* WaDisableGPGPUMidThreadPreemption:gen12 */
 714	wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
 715			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 716			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 717
 718	/*
 719	 * Wa_16011163337 - GS_TIMER
 720	 *
 721	 * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
 722	 * need to program it even on those that don't explicitly list that
 723	 * workaround.
 724	 *
 725	 * Note that the programming of GEN12_FF_MODE2 is further modified
 726	 * according to the FF_MODE2 guidance given by Wa_1608008084.
 727	 * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
 728	 * value when read from the CPU.
 729	 *
 730	 * The default value for this register is zero for all fields.
 731	 * So instead of doing a RMW we should just write the desired values
 732	 * for TDS and GS timers. Note that since the readback can't be trusted,
 733	 * the clear mask is just set to ~0 to make sure other bits are not
 734	 * inadvertently set. For the same reason read verification is ignored.
 735	 */
 736	wa_add(wal,
 737	       GEN12_FF_MODE2,
 738	       ~0,
 739	       FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
 740	       0, false);
 741
 742	if (!IS_DG1(i915)) {
 743		/* Wa_1806527549 */
 744		wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
 745
 746		/* Wa_1606376872 */
 747		wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
 748	}
 749}
 750
 751static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
 752				     struct i915_wa_list *wal)
 753{
 754	gen12_ctx_workarounds_init(engine, wal);
 755
 756	/* Wa_1409044764 */
 757	wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
 758		      DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
 759
 760	/* Wa_22010493298 */
 761	wa_masked_en(wal, HIZ_CHICKEN,
 762		     DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
 763}
 764
 765static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
 766				     struct i915_wa_list *wal)
 767{
 768	dg2_ctx_gt_tuning_init(engine, wal);
 769
 770	/* Wa_16013271637:dg2 */
 771	wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
 772			 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 773
 774	/* Wa_14014947963:dg2 */
 775	wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
 776
 777	/* Wa_18018764978:dg2 */
 778	wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
 779
 780	/* Wa_18019271663:dg2 */
 781	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
 782
 783	/* Wa_14019877138:dg2 */
 784	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 785}
 786
 787static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
 788				     struct i915_wa_list *wal)
 789{
 790	struct intel_gt *gt = engine->gt;
 791
 792	dg2_ctx_gt_tuning_init(engine, wal);
 793
 794	/*
 795	 * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
 796	 * gen12_emit_indirect_ctx_rcs() rather than here on some early
 797	 * steppings.
 798	 */
 799	if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
 800	      IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
 801		wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
 802}
 803
 804static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
 805				       struct i915_wa_list *wal)
 806{
 807	struct intel_gt *gt = engine->gt;
 808
 809	xelpg_ctx_gt_tuning_init(engine, wal);
 810
 811	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
 812	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
 813		/* Wa_14014947963 */
 814		wa_masked_field_set(wal, VF_PREEMPTION,
 815				    PREEMPTION_VERTEX_COUNT, 0x4000);
 816
 817		/* Wa_16013271637 */
 818		wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
 819				 MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
 820
 821		/* Wa_18019627453 */
 822		wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
 823
 824		/* Wa_18018764978 */
 825		wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
 826	}
 827
 828	/* Wa_18019271663 */
 829	wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
 830
 831	/* Wa_14019877138 */
 832	wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
 833}
 834
 835static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
 836					 struct i915_wa_list *wal)
 837{
 838	/*
 839	 * This is a "fake" workaround defined by software to ensure we
 840	 * maintain reliable, backward-compatible behavior for userspace with
 841	 * regards to how nested MI_BATCH_BUFFER_START commands are handled.
 842	 *
 843	 * The per-context setting of MI_MODE[12] determines whether the bits
 844	 * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
 845	 * in the traditional manner or whether they should instead use a new
 846	 * tgl+ meaning that breaks backward compatibility, but allows nesting
 847	 * into 3rd-level batchbuffers.  When this new capability was first
 848	 * added in TGL, it remained off by default unless a context
 849	 * intentionally opted in to the new behavior.  However Xe_HPG now
 850	 * flips this on by default and requires that we explicitly opt out if
 851	 * we don't want the new behavior.
 852	 *
 853	 * From a SW perspective, we want to maintain the backward-compatible
 854	 * behavior for userspace, so we'll apply a fake workaround to set it
 855	 * back to the legacy behavior on platforms where the hardware default
 856	 * is to break compatibility.  At the moment there is no Linux
 857	 * userspace that utilizes third-level batchbuffers, so this will avoid
 858	 * userspace from needing to make any changes.  using the legacy
 859	 * meaning is the correct thing to do.  If/when we have userspace
 860	 * consumers that want to utilize third-level batch nesting, we can
 861	 * provide a context parameter to allow them to opt-in.
 862	 */
 863	wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
 864}
 865
 866static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
 867				   struct i915_wa_list *wal)
 868{
 869	u8 mocs;
 870
 871	/*
 872	 * Some blitter commands do not have a field for MOCS, those
 873	 * commands will use MOCS index pointed by BLIT_CCTL.
 874	 * BLIT_CCTL registers are needed to be programmed to un-cached.
 875	 */
 876	if (engine->class == COPY_ENGINE_CLASS) {
 877		mocs = engine->gt->mocs.uc_index;
 878		wa_write_clr_set(wal,
 879				 BLIT_CCTL(engine->mmio_base),
 880				 BLIT_CCTL_MASK,
 881				 BLIT_CCTL_MOCS(mocs, mocs));
 882	}
 883}
 884
 885/*
 886 * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
 887 * defined by the hardware team, but it programming general context registers.
 888 * Adding those context register programming in context workaround
 889 * allow us to use the wa framework for proper application and validation.
 890 */
 891static void
 892gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
 893			  struct i915_wa_list *wal)
 894{
 895	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 896		fakewa_disable_nestedbb_mode(engine, wal);
 897
 898	gen12_ctx_gt_mocs_init(engine, wal);
 899}
 900
 901static void
 902__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 903			   struct i915_wa_list *wal,
 904			   const char *name)
 905{
 906	struct drm_i915_private *i915 = engine->i915;
 907
 908	wa_init_start(wal, engine->gt, name, engine->name);
 909
 910	/* Applies to all engines */
 911	/*
 912	 * Fake workarounds are not the actual workaround but
 913	 * programming of context registers using workaround framework.
 914	 */
 915	if (GRAPHICS_VER(i915) >= 12)
 916		gen12_ctx_gt_fake_wa_init(engine, wal);
 917
 918	if (engine->class != RENDER_CLASS)
 919		goto done;
 920
 921	if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
 922		xelpg_ctx_workarounds_init(engine, wal);
 923	else if (IS_PONTEVECCHIO(i915))
 924		; /* noop; none at this time */
 925	else if (IS_DG2(i915))
 926		dg2_ctx_workarounds_init(engine, wal);
 927	else if (IS_XEHPSDV(i915))
 928		; /* noop; none at this time */
 929	else if (IS_DG1(i915))
 930		dg1_ctx_workarounds_init(engine, wal);
 931	else if (GRAPHICS_VER(i915) == 12)
 932		gen12_ctx_workarounds_init(engine, wal);
 933	else if (GRAPHICS_VER(i915) == 11)
 934		icl_ctx_workarounds_init(engine, wal);
 935	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
 936		cfl_ctx_workarounds_init(engine, wal);
 937	else if (IS_GEMINILAKE(i915))
 938		glk_ctx_workarounds_init(engine, wal);
 939	else if (IS_KABYLAKE(i915))
 940		kbl_ctx_workarounds_init(engine, wal);
 941	else if (IS_BROXTON(i915))
 942		bxt_ctx_workarounds_init(engine, wal);
 943	else if (IS_SKYLAKE(i915))
 944		skl_ctx_workarounds_init(engine, wal);
 945	else if (IS_CHERRYVIEW(i915))
 946		chv_ctx_workarounds_init(engine, wal);
 947	else if (IS_BROADWELL(i915))
 948		bdw_ctx_workarounds_init(engine, wal);
 949	else if (GRAPHICS_VER(i915) == 7)
 950		gen7_ctx_workarounds_init(engine, wal);
 951	else if (GRAPHICS_VER(i915) == 6)
 952		gen6_ctx_workarounds_init(engine, wal);
 953	else if (GRAPHICS_VER(i915) < 8)
 954		;
 955	else
 956		MISSING_CASE(GRAPHICS_VER(i915));
 957
 958done:
 959	wa_init_finish(wal);
 960}
 961
 962void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 963{
 964	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
 965}
 966
 967int intel_engine_emit_ctx_wa(struct i915_request *rq)
 968{
 969	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
 970	struct intel_uncore *uncore = rq->engine->uncore;
 971	enum forcewake_domains fw;
 972	unsigned long flags;
 973	struct i915_wa *wa;
 974	unsigned int i;
 975	u32 *cs;
 976	int ret;
 977
 978	if (wal->count == 0)
 979		return 0;
 980
 981	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 982	if (ret)
 983		return ret;
 984
 985	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
 986	if (IS_ERR(cs))
 987		return PTR_ERR(cs);
 988
 989	fw = wal_get_fw_for_rmw(uncore, wal);
 990
 991	intel_gt_mcr_lock(wal->gt, &flags);
 992	spin_lock(&uncore->lock);
 993	intel_uncore_forcewake_get__locked(uncore, fw);
 994
 995	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 996	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 997		u32 val;
 998
 999		/* Skip reading the register if it's not really needed */
1000		if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
1001			val = wa->set;
1002		} else {
1003			val = wa->is_mcr ?
1004				intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
1005				intel_uncore_read_fw(uncore, wa->reg);
1006			val &= ~wa->clr;
1007			val |= wa->set;
1008		}
1009
1010		*cs++ = i915_mmio_reg_offset(wa->reg);
1011		*cs++ = val;
1012	}
1013	*cs++ = MI_NOOP;
1014
1015	intel_uncore_forcewake_put__locked(uncore, fw);
1016	spin_unlock(&uncore->lock);
1017	intel_gt_mcr_unlock(wal->gt, flags);
1018
1019	intel_ring_advance(rq, cs);
1020
1021	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1022	if (ret)
1023		return ret;
1024
1025	return 0;
1026}
1027
1028static void
1029gen4_gt_workarounds_init(struct intel_gt *gt,
1030			 struct i915_wa_list *wal)
1031{
1032	/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1033	wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1034}
1035
1036static void
1037g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1038{
1039	gen4_gt_workarounds_init(gt, wal);
1040
1041	/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1042	wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
1043}
1044
1045static void
1046ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1047{
1048	g4x_gt_workarounds_init(gt, wal);
1049
1050	wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
1051}
1052
1053static void
1054snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1055{
1056}
1057
1058static void
1059ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1060{
1061	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
1062	wa_masked_dis(wal,
1063		      GEN7_COMMON_SLICE_CHICKEN1,
1064		      GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
1065
1066	/* WaApplyL3ControlAndL3ChickenMode:ivb */
1067	wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
1068	wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
1069
1070	/* WaForceL3Serialization:ivb */
1071	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1072}
1073
1074static void
1075vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1076{
1077	/* WaForceL3Serialization:vlv */
1078	wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
1079
1080	/*
1081	 * WaIncreaseL3CreditsForVLVB0:vlv
1082	 * This is the hardware default actually.
1083	 */
1084	wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
1085}
1086
1087static void
1088hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1089{
1090	/* L3 caching of data atomics doesn't work -- disable it. */
1091	wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
1092
1093	wa_add(wal,
1094	       HSW_ROW_CHICKEN3, 0,
1095	       _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
1096	       0 /* XXX does this reg exist? */, true);
1097
1098	/* WaVSRefCountFullforceMissDisable:hsw */
1099	wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
1100}
1101
1102static void
1103gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1104{
1105	const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
1106	unsigned int slice, subslice;
1107	u32 mcr, mcr_mask;
1108
1109	GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
1110
1111	/*
1112	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
1113	 * Before any MMIO read into slice/subslice specific registers, MCR
1114	 * packet control register needs to be programmed to point to any
1115	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
1116	 * This means each subsequent MMIO read will be forwarded to an
1117	 * specific s/ss combination, but this is OK since these registers
1118	 * are consistent across s/ss in almost all cases. In the rare
1119	 * occasions, such as INSTDONE, where this value is dependent
1120	 * on s/ss combo, the read should be done with read_subslice_reg.
1121	 */
1122	slice = ffs(sseu->slice_mask) - 1;
1123	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
1124	subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
1125	GEM_BUG_ON(!subslice);
1126	subslice--;
1127
1128	/*
1129	 * We use GEN8_MCR..() macros to calculate the |mcr| value for
1130	 * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
1131	 */
1132	mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1133	mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1134
1135	drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
1136
1137	wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1138}
1139
1140static void
1141gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1142{
1143	struct drm_i915_private *i915 = gt->i915;
1144
1145	/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
1146	gen9_wa_init_mcr(i915, wal);
1147
1148	/* WaDisableKillLogic:bxt,skl,kbl */
1149	if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1150		wa_write_or(wal,
1151			    GAM_ECOCHK,
1152			    ECOCHK_DIS_TLB);
1153
1154	if (HAS_LLC(i915)) {
1155		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1156		 *
1157		 * Must match Display Engine. See
1158		 * WaCompressedResourceDisplayNewHashMode.
1159		 */
1160		wa_write_or(wal,
1161			    MMCD_MISC_CTRL,
1162			    MMCD_PCLA | MMCD_HOTSPOT_EN);
1163	}
1164
1165	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1166	wa_write_or(wal,
1167		    GAM_ECOCHK,
1168		    BDW_DISABLE_HDC_INVALIDATION);
1169}
1170
1171static void
1172skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1173{
1174	gen9_gt_workarounds_init(gt, wal);
1175
1176	/* WaDisableGafsUnitClkGating:skl */
1177	wa_write_or(wal,
1178		    GEN7_UCGCTL4,
1179		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1180
1181	/* WaInPlaceDecompressionHang:skl */
1182	if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1183		wa_write_or(wal,
1184			    GEN9_GAMT_ECO_REG_RW_IA,
1185			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1186}
1187
1188static void
1189kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1190{
1191	gen9_gt_workarounds_init(gt, wal);
1192
1193	/* WaDisableDynamicCreditSharing:kbl */
1194	if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1195		wa_write_or(wal,
1196			    GAMT_CHKN_BIT_REG,
1197			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1198
1199	/* WaDisableGafsUnitClkGating:kbl */
1200	wa_write_or(wal,
1201		    GEN7_UCGCTL4,
1202		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1203
1204	/* WaInPlaceDecompressionHang:kbl */
1205	wa_write_or(wal,
1206		    GEN9_GAMT_ECO_REG_RW_IA,
1207		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1208}
1209
1210static void
1211glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1212{
1213	gen9_gt_workarounds_init(gt, wal);
1214}
1215
1216static void
1217cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1218{
1219	gen9_gt_workarounds_init(gt, wal);
1220
1221	/* WaDisableGafsUnitClkGating:cfl */
1222	wa_write_or(wal,
1223		    GEN7_UCGCTL4,
1224		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1225
1226	/* WaInPlaceDecompressionHang:cfl */
1227	wa_write_or(wal,
1228		    GEN9_GAMT_ECO_REG_RW_IA,
1229		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1230}
1231
1232static void __set_mcr_steering(struct i915_wa_list *wal,
1233			       i915_reg_t steering_reg,
1234			       unsigned int slice, unsigned int subslice)
1235{
1236	u32 mcr, mcr_mask;
1237
1238	mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1239	mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1240
1241	wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
1242}
1243
1244static void debug_dump_steering(struct intel_gt *gt)
1245{
1246	struct drm_printer p = drm_dbg_printer(&gt->i915->drm, DRM_UT_DRIVER,
1247					       "MCR Steering:");
1248
1249	if (drm_debug_enabled(DRM_UT_DRIVER))
1250		intel_gt_mcr_report_steering(&p, gt, false);
1251}
1252
1253static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
1254			 unsigned int slice, unsigned int subslice)
1255{
1256	__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
1257
1258	gt->default_steering.groupid = slice;
1259	gt->default_steering.instanceid = subslice;
1260
1261	debug_dump_steering(gt);
1262}
1263
1264static void
1265icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1266{
1267	const struct sseu_dev_info *sseu = &gt->info.sseu;
1268	unsigned int subslice;
1269
1270	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
1271	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1272
1273	/*
1274	 * Although a platform may have subslices, we need to always steer
1275	 * reads to the lowest instance that isn't fused off.  When Render
1276	 * Power Gating is enabled, grabbing forcewake will only power up a
1277	 * single subslice (the "minconfig") if there isn't a real workload
1278	 * that needs to be run; this means that if we steer register reads to
1279	 * one of the higher subslices, we run the risk of reading back 0's or
1280	 * random garbage.
1281	 */
1282	subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1283
1284	/*
1285	 * If the subslice we picked above also steers us to a valid L3 bank,
1286	 * then we can just rely on the default steering and won't need to
1287	 * worry about explicitly re-steering L3BANK reads later.
1288	 */
1289	if (gt->info.l3bank_mask & BIT(subslice))
1290		gt->steering_table[L3BANK] = NULL;
1291
1292	__add_mcr_wa(gt, wal, 0, subslice);
1293}
1294
1295static void
1296xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1297{
1298	const struct sseu_dev_info *sseu = &gt->info.sseu;
1299	unsigned long slice, subslice = 0, slice_mask = 0;
1300	u32 lncf_mask = 0;
1301	int i;
1302
1303	/*
1304	 * On Xe_HP the steering increases in complexity. There are now several
1305	 * more units that require steering and we're not guaranteed to be able
1306	 * to find a common setting for all of them. These are:
1307	 * - GSLICE (fusable)
1308	 * - DSS (sub-unit within gslice; fusable)
1309	 * - L3 Bank (fusable)
1310	 * - MSLICE (fusable)
1311	 * - LNCF (sub-unit within mslice; always present if mslice is present)
1312	 *
1313	 * We'll do our default/implicit steering based on GSLICE (in the
1314	 * sliceid field) and DSS (in the subsliceid field).  If we can
1315	 * find overlap between the valid MSLICE and/or LNCF values with
1316	 * a suitable GSLICE, then we can just re-use the default value and
1317	 * skip and explicit steering at runtime.
1318	 *
1319	 * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
1320	 * a valid sliceid value.  DSS steering is the only type of steering
1321	 * that utilizes the 'subsliceid' bits.
1322	 *
1323	 * Also note that, even though the steering domain is called "GSlice"
1324	 * and it is encoded in the register using the gslice format, the spec
1325	 * says that the combined (geometry | compute) fuse should be used to
1326	 * select the steering.
1327	 */
1328
1329	/* Find the potential gslice candidates */
1330	slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
1331						       GEN_DSS_PER_GSLICE);
1332
1333	/*
1334	 * Find the potential LNCF candidates.  Either LNCF within a valid
1335	 * mslice is fine.
1336	 */
1337	for_each_set_bit(i, &gt->info.mslice_mask, GEN12_MAX_MSLICES)
1338		lncf_mask |= (0x3 << (i * 2));
1339
1340	/*
1341	 * Are there any sliceid values that work for both GSLICE and LNCF
1342	 * steering?
1343	 */
1344	if (slice_mask & lncf_mask) {
1345		slice_mask &= lncf_mask;
1346		gt->steering_table[LNCF] = NULL;
1347	}
1348
1349	/* How about sliceid values that also work for MSLICE steering? */
1350	if (slice_mask & gt->info.mslice_mask) {
1351		slice_mask &= gt->info.mslice_mask;
1352		gt->steering_table[MSLICE] = NULL;
1353	}
1354
1355	if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
1356		gt->steering_table[GAM] = NULL;
1357
1358	slice = __ffs(slice_mask);
1359	subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
1360		GEN_DSS_PER_GSLICE;
1361
1362	__add_mcr_wa(gt, wal, slice, subslice);
1363
1364	/*
1365	 * SQIDI ranges are special because they use different steering
1366	 * registers than everything else we work with.  On XeHP SDV and
1367	 * DG2-G10, any value in the steering registers will work fine since
1368	 * all instances are present, but DG2-G11 only has SQIDI instances at
1369	 * ID's 2 and 3, so we need to steer to one of those.  For simplicity
1370	 * we'll just steer to a hardcoded "2" since that value will work
1371	 * everywhere.
1372	 */
1373	__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
1374	__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1375
1376	/*
1377	 * On DG2, GAM registers have a dedicated steering control register
1378	 * and must always be programmed to a hardcoded groupid of "1."
1379	 */
1380	if (IS_DG2(gt->i915))
1381		__set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1382}
1383
1384static void
1385pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1386{
1387	unsigned int dss;
1388
1389	/*
1390	 * Setup implicit steering for COMPUTE and DSS ranges to the first
1391	 * non-fused-off DSS.  All other types of MCR registers will be
1392	 * explicitly steered.
1393	 */
1394	dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
1395	__add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
1396}
1397
1398static void
1399icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1400{
1401	struct drm_i915_private *i915 = gt->i915;
1402
1403	icl_wa_init_mcr(gt, wal);
1404
1405	/* WaModifyGamTlbPartitioning:icl */
1406	wa_write_clr_set(wal,
1407			 GEN11_GACB_PERF_CTRL,
1408			 GEN11_HASH_CTRL_MASK,
1409			 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1410
1411	/* Wa_1405766107:icl
1412	 * Formerly known as WaCL2SFHalfMaxAlloc
1413	 */
1414	wa_write_or(wal,
1415		    GEN11_LSN_UNSLCVC,
1416		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1417		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1418
1419	/* Wa_220166154:icl
1420	 * Formerly known as WaDisCtxReload
1421	 */
1422	wa_write_or(wal,
1423		    GEN8_GAMW_ECO_DEV_RW_IA,
1424		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1425
1426	/* Wa_1406463099:icl
1427	 * Formerly known as WaGamTlbPendError
1428	 */
1429	wa_write_or(wal,
1430		    GAMT_CHKN_BIT_REG,
1431		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
1432
1433	/*
1434	 * Wa_1408615072:icl,ehl  (vsunit)
1435	 * Wa_1407596294:icl,ehl  (hsunit)
1436	 */
1437	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1438		    VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1439
1440	/* Wa_1407352427:icl,ehl */
1441	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1442		    PSDUNIT_CLKGATE_DIS);
1443
1444	/* Wa_1406680159:icl,ehl */
1445	wa_mcr_write_or(wal,
1446			GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1447			GWUNIT_CLKGATE_DIS);
1448
1449	/* Wa_1607087056:icl,ehl,jsl */
1450	if (IS_ICELAKE(i915) ||
1451		((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
1452		IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
1453		wa_write_or(wal,
1454			    GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1455			    L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1456
1457	/*
1458	 * This is not a documented workaround, but rather an optimization
1459	 * to reduce sampler power.
1460	 */
1461	wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1462}
1463
1464/*
1465 * Though there are per-engine instances of these registers,
1466 * they retain their value through engine resets and should
1467 * only be provided on the GT workaround list rather than
1468 * the engine-specific workaround list.
1469 */
1470static void
1471wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
1472{
1473	struct intel_engine_cs *engine;
1474	int id;
1475
1476	for_each_engine(engine, gt, id) {
1477		if (engine->class != VIDEO_DECODE_CLASS ||
1478		    (engine->instance % 2))
1479			continue;
1480
1481		wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
1482			    IECPUNIT_CLKGATE_DIS);
1483	}
1484}
1485
1486static void
1487gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1488{
1489	icl_wa_init_mcr(gt, wal);
1490
1491	/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
1492	wa_14011060649(gt, wal);
1493
1494	/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1495	wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1496
1497	/*
1498	 * Wa_14015795083
1499	 *
1500	 * Firmware on some gen12 platforms locks the MISCCPCTL register,
1501	 * preventing i915 from modifying it for this workaround.  Skip the
1502	 * readback verification for this workaround on debug builds; if the
1503	 * workaround doesn't stick due to firmware behavior, it's not an error
1504	 * that we want CI to flag.
1505	 */
1506	wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
1507	       0, 0, false);
1508}
1509
1510static void
1511dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1512{
1513	gen12_gt_workarounds_init(gt, wal);
1514
1515	/* Wa_1409420604:dg1 */
1516	wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
1517			CPSSUNIT_CLKGATE_DIS);
1518
1519	/* Wa_1408615072:dg1 */
1520	/* Empirical testing shows this register is unaffected by engine reset. */
1521	wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
1522}
1523
1524static void
1525xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1526{
1527	struct drm_i915_private *i915 = gt->i915;
1528
1529	xehp_init_mcr(gt, wal);
1530
1531	/* Wa_1409757795:xehpsdv */
1532	wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1533
1534	/* Wa_18011725039:xehpsdv */
1535	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
1536		wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
1537		wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
1538	}
1539
1540	/* Wa_16011155590:xehpsdv */
1541	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
1542		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1543			    TSGUNIT_CLKGATE_DIS);
1544
1545	/* Wa_14011780169:xehpsdv */
1546	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
1547		wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
1548			    GAMTLBVDBOX7_CLKGATE_DIS |
1549			    GAMTLBVDBOX6_CLKGATE_DIS |
1550			    GAMTLBVDBOX5_CLKGATE_DIS |
1551			    GAMTLBVDBOX4_CLKGATE_DIS |
1552			    GAMTLBVDBOX3_CLKGATE_DIS |
1553			    GAMTLBVDBOX2_CLKGATE_DIS |
1554			    GAMTLBVDBOX1_CLKGATE_DIS |
1555			    GAMTLBVDBOX0_CLKGATE_DIS |
1556			    GAMTLBKCR_CLKGATE_DIS |
1557			    GAMTLBGUC_CLKGATE_DIS |
1558			    GAMTLBBLT_CLKGATE_DIS);
1559		wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
1560			    GAMTLBGFXA1_CLKGATE_DIS |
1561			    GAMTLBCOMPA0_CLKGATE_DIS |
1562			    GAMTLBCOMPA1_CLKGATE_DIS |
1563			    GAMTLBCOMPB0_CLKGATE_DIS |
1564			    GAMTLBCOMPB1_CLKGATE_DIS |
1565			    GAMTLBCOMPC0_CLKGATE_DIS |
1566			    GAMTLBCOMPC1_CLKGATE_DIS |
1567			    GAMTLBCOMPD0_CLKGATE_DIS |
1568			    GAMTLBCOMPD1_CLKGATE_DIS |
1569			    GAMTLBMERT_CLKGATE_DIS   |
1570			    GAMTLBVEBOX3_CLKGATE_DIS |
1571			    GAMTLBVEBOX2_CLKGATE_DIS |
1572			    GAMTLBVEBOX1_CLKGATE_DIS |
1573			    GAMTLBVEBOX0_CLKGATE_DIS);
1574	}
1575
1576	/* Wa_16012725990:xehpsdv */
1577	if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
1578		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
1579
1580	/* Wa_14011060649:xehpsdv */
1581	wa_14011060649(gt, wal);
1582
1583	/* Wa_14012362059:xehpsdv */
1584	wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
1585
1586	/* Wa_14014368820:xehpsdv */
1587	wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1588			INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1589
1590	/* Wa_14010670810:xehpsdv */
1591	wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1592}
1593
1594static void
1595dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1596{
1597	xehp_init_mcr(gt, wal);
1598
1599	/* Wa_14011060649:dg2 */
1600	wa_14011060649(gt, wal);
1601
1602	if (IS_DG2_G10(gt->i915)) {
1603		/* Wa_22010523718:dg2 */
1604		wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1605			    CG3DDISCFEG_CLKGATE_DIS);
1606
1607		/* Wa_14011006942:dg2 */
1608		wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
1609				DSS_ROUTER_CLKGATE_DIS);
1610	}
1611
1612	/* Wa_14014830051:dg2 */
1613	wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1614
1615	/*
1616	 * Wa_14015795083
1617	 * Skip verification for possibly locked register.
1618	 */
1619	wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
1620	       0, 0, false);
1621
1622	/* Wa_18018781329 */
1623	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1624	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1625	wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1626	wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1627
1628	/* Wa_1509235366:dg2 */
1629	wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1630			INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1631
1632	/* Wa_14010648519:dg2 */
1633	wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1634}
1635
1636static void
1637pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1638{
1639	pvc_init_mcr(gt, wal);
1640
1641	/* Wa_14015795083 */
1642	wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1643
1644	/* Wa_18018781329 */
1645	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1646	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1647	wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1648	wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1649
1650	/* Wa_16016694945 */
1651	wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
1652}
1653
1654static void
1655xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1656{
1657	/* Wa_14018575942 / Wa_18018781329 */
1658	wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1659	wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1660
1661	/* Wa_22016670082 */
1662	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
1663
1664	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1665	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
1666		/* Wa_14014830051 */
1667		wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1668
1669		/* Wa_14015795083 */
1670		wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1671	}
1672
1673	/*
1674	 * Unlike older platforms, we no longer setup implicit steering here;
1675	 * all MCR accesses are explicitly steered.
1676	 */
1677	debug_dump_steering(gt);
1678}
1679
1680static void
1681wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
1682{
1683	struct intel_engine_cs *engine;
1684	int id;
1685
1686	for_each_engine(engine, gt, id)
1687		if (engine->class == VIDEO_DECODE_CLASS)
1688			wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
1689				    MFXPIPE_CLKGATE_DIS);
1690}
1691
1692static void
1693xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1694{
1695	wa_16021867713(gt, wal);
1696
1697	/*
1698	 * Wa_14018778641
1699	 * Wa_18018781329
1700	 *
1701	 * Note that although these registers are MCR on the primary
1702	 * GT, the media GT's versions are regular singleton registers.
1703	 */
1704	wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
1705
1706	/* Wa_22016670082 */
1707	wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
1708
1709	debug_dump_steering(gt);
1710}
1711
1712/*
1713 * The bspec performance guide has recommended MMIO tuning settings.  These
1714 * aren't truly "workarounds" but we want to program them through the
1715 * workaround infrastructure to make sure they're (re)applied at the proper
1716 * times.
1717 *
1718 * The programming in this function is for settings that persist through
1719 * engine resets and also are not part of any engine's register state context.
1720 * I.e., settings that only need to be re-applied in the event of a full GT
1721 * reset.
1722 */
1723static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
1724{
1725	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
1726		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1727		wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1728	}
1729
1730	if (IS_PONTEVECCHIO(gt->i915)) {
1731		wa_mcr_write(wal, XEHPC_L3SCRUB,
1732			     SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
1733		wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
1734	}
1735
1736	if (IS_DG2(gt->i915)) {
1737		wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1738		wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1739	}
1740}
1741
1742static void
1743gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
1744{
1745	struct drm_i915_private *i915 = gt->i915;
1746
1747	gt_tuning_settings(gt, wal);
1748
1749	if (gt->type == GT_MEDIA) {
1750		if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
1751			xelpmp_gt_workarounds_init(gt, wal);
1752		else
1753			MISSING_CASE(MEDIA_VER_FULL(i915));
1754
1755		return;
1756	}
1757
1758	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
1759		xelpg_gt_workarounds_init(gt, wal);
1760	else if (IS_PONTEVECCHIO(i915))
1761		pvc_gt_workarounds_init(gt, wal);
1762	else if (IS_DG2(i915))
1763		dg2_gt_workarounds_init(gt, wal);
1764	else if (IS_XEHPSDV(i915))
1765		xehpsdv_gt_workarounds_init(gt, wal);
1766	else if (IS_DG1(i915))
1767		dg1_gt_workarounds_init(gt, wal);
1768	else if (GRAPHICS_VER(i915) == 12)
1769		gen12_gt_workarounds_init(gt, wal);
1770	else if (GRAPHICS_VER(i915) == 11)
1771		icl_gt_workarounds_init(gt, wal);
1772	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1773		cfl_gt_workarounds_init(gt, wal);
1774	else if (IS_GEMINILAKE(i915))
1775		glk_gt_workarounds_init(gt, wal);
1776	else if (IS_KABYLAKE(i915))
1777		kbl_gt_workarounds_init(gt, wal);
1778	else if (IS_BROXTON(i915))
1779		gen9_gt_workarounds_init(gt, wal);
1780	else if (IS_SKYLAKE(i915))
1781		skl_gt_workarounds_init(gt, wal);
1782	else if (IS_HASWELL(i915))
1783		hsw_gt_workarounds_init(gt, wal);
1784	else if (IS_VALLEYVIEW(i915))
1785		vlv_gt_workarounds_init(gt, wal);
1786	else if (IS_IVYBRIDGE(i915))
1787		ivb_gt_workarounds_init(gt, wal);
1788	else if (GRAPHICS_VER(i915) == 6)
1789		snb_gt_workarounds_init(gt, wal);
1790	else if (GRAPHICS_VER(i915) == 5)
1791		ilk_gt_workarounds_init(gt, wal);
1792	else if (IS_G4X(i915))
1793		g4x_gt_workarounds_init(gt, wal);
1794	else if (GRAPHICS_VER(i915) == 4)
1795		gen4_gt_workarounds_init(gt, wal);
1796	else if (GRAPHICS_VER(i915) <= 8)
1797		;
1798	else
1799		MISSING_CASE(GRAPHICS_VER(i915));
1800}
1801
1802void intel_gt_init_workarounds(struct intel_gt *gt)
1803{
1804	struct i915_wa_list *wal = &gt->wa_list;
1805
1806	wa_init_start(wal, gt, "GT", "global");
1807	gt_init_workarounds(gt, wal);
1808	wa_init_finish(wal);
1809}
1810
1811static bool
1812wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
1813	  const char *name, const char *from)
1814{
1815	if ((cur ^ wa->set) & wa->read) {
1816		gt_err(gt,
1817		       "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1818		       name, from, i915_mmio_reg_offset(wa->reg),
1819		       cur, cur & wa->read, wa->set & wa->read);
1820
1821		return false;
1822	}
1823
1824	return true;
1825}
1826
1827static void wa_list_apply(const struct i915_wa_list *wal)
1828{
1829	struct intel_gt *gt = wal->gt;
1830	struct intel_uncore *uncore = gt->uncore;
1831	enum forcewake_domains fw;
1832	unsigned long flags;
1833	struct i915_wa *wa;
1834	unsigned int i;
1835
1836	if (!wal->count)
1837		return;
1838
1839	fw = wal_get_fw_for_rmw(uncore, wal);
1840
1841	intel_gt_mcr_lock(gt, &flags);
1842	spin_lock(&uncore->lock);
1843	intel_uncore_forcewake_get__locked(uncore, fw);
1844
1845	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1846		u32 val, old = 0;
1847
1848		/* open-coded rmw due to steering */
1849		if (wa->clr)
1850			old = wa->is_mcr ?
1851				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1852				intel_uncore_read_fw(uncore, wa->reg);
1853		val = (old & ~wa->clr) | wa->set;
1854		if (val != old || !wa->clr) {
1855			if (wa->is_mcr)
1856				intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1857			else
1858				intel_uncore_write_fw(uncore, wa->reg, val);
1859		}
1860
1861		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1862			u32 val = wa->is_mcr ?
1863				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1864				intel_uncore_read_fw(uncore, wa->reg);
1865
1866			wa_verify(gt, wa, val, wal->name, "application");
1867		}
1868	}
1869
1870	intel_uncore_forcewake_put__locked(uncore, fw);
1871	spin_unlock(&uncore->lock);
1872	intel_gt_mcr_unlock(gt, flags);
1873}
1874
1875void intel_gt_apply_workarounds(struct intel_gt *gt)
1876{
1877	wa_list_apply(&gt->wa_list);
1878}
1879
1880static bool wa_list_verify(struct intel_gt *gt,
1881			   const struct i915_wa_list *wal,
1882			   const char *from)
1883{
1884	struct intel_uncore *uncore = gt->uncore;
1885	struct i915_wa *wa;
1886	enum forcewake_domains fw;
1887	unsigned long flags;
1888	unsigned int i;
1889	bool ok = true;
1890
1891	fw = wal_get_fw_for_rmw(uncore, wal);
1892
1893	intel_gt_mcr_lock(gt, &flags);
1894	spin_lock(&uncore->lock);
1895	intel_uncore_forcewake_get__locked(uncore, fw);
1896
1897	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1898		ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1899				intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1900				intel_uncore_read_fw(uncore, wa->reg),
1901				wal->name, from);
1902
1903	intel_uncore_forcewake_put__locked(uncore, fw);
1904	spin_unlock(&uncore->lock);
1905	intel_gt_mcr_unlock(gt, flags);
1906
1907	return ok;
1908}
1909
1910bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1911{
1912	return wa_list_verify(gt, &gt->wa_list, from);
1913}
1914
1915__maybe_unused
1916static bool is_nonpriv_flags_valid(u32 flags)
1917{
1918	/* Check only valid flag bits are set */
1919	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1920		return false;
1921
1922	/* NB: Only 3 out of 4 enum values are valid for access field */
1923	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1924	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1925		return false;
1926
1927	return true;
1928}
1929
1930static void
1931whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1932{
1933	struct i915_wa wa = {
1934		.reg = reg
1935	};
1936
1937	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1938		return;
1939
1940	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1941		return;
1942
1943	wa.reg.reg |= flags;
1944	_wa_add(wal, &wa);
1945}
1946
1947static void
1948whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1949{
1950	struct i915_wa wa = {
1951		.mcr_reg = reg,
1952		.is_mcr = 1,
1953	};
1954
1955	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1956		return;
1957
1958	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1959		return;
1960
1961	wa.mcr_reg.reg |= flags;
1962	_wa_add(wal, &wa);
1963}
1964
1965static void
1966whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1967{
1968	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1969}
1970
1971static void
1972whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1973{
1974	whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1975}
1976
1977static void gen9_whitelist_build(struct i915_wa_list *w)
1978{
1979	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1980	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1981
1982	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1983	whitelist_reg(w, GEN8_CS_CHICKEN1);
1984
1985	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1986	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1987
1988	/* WaSendPushConstantsFromMMIO:skl,bxt */
1989	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1990}
1991
1992static void skl_whitelist_build(struct intel_engine_cs *engine)
1993{
1994	struct i915_wa_list *w = &engine->whitelist;
1995
1996	if (engine->class != RENDER_CLASS)
1997		return;
1998
1999	gen9_whitelist_build(w);
2000
2001	/* WaDisableLSQCROPERFforOCL:skl */
2002	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2003}
2004
2005static void bxt_whitelist_build(struct intel_engine_cs *engine)
2006{
2007	if (engine->class != RENDER_CLASS)
2008		return;
2009
2010	gen9_whitelist_build(&engine->whitelist);
2011}
2012
2013static void kbl_whitelist_build(struct intel_engine_cs *engine)
2014{
2015	struct i915_wa_list *w = &engine->whitelist;
2016
2017	if (engine->class != RENDER_CLASS)
2018		return;
2019
2020	gen9_whitelist_build(w);
2021
2022	/* WaDisableLSQCROPERFforOCL:kbl */
2023	whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2024}
2025
2026static void glk_whitelist_build(struct intel_engine_cs *engine)
2027{
2028	struct i915_wa_list *w = &engine->whitelist;
2029
2030	if (engine->class != RENDER_CLASS)
2031		return;
2032
2033	gen9_whitelist_build(w);
2034
2035	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2036	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2037}
2038
2039static void cfl_whitelist_build(struct intel_engine_cs *engine)
2040{
2041	struct i915_wa_list *w = &engine->whitelist;
2042
2043	if (engine->class != RENDER_CLASS)
2044		return;
2045
2046	gen9_whitelist_build(w);
2047
2048	/*
2049	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2050	 *
2051	 * This covers 4 register which are next to one another :
2052	 *   - PS_INVOCATION_COUNT
2053	 *   - PS_INVOCATION_COUNT_UDW
2054	 *   - PS_DEPTH_COUNT
2055	 *   - PS_DEPTH_COUNT_UDW
2056	 */
2057	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2058			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2059			  RING_FORCE_TO_NONPRIV_RANGE_4);
2060}
2061
2062static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
2063{
2064	struct i915_wa_list *w = &engine->whitelist;
2065
2066	if (engine->class != RENDER_CLASS)
2067		whitelist_reg_ext(w,
2068				  RING_CTX_TIMESTAMP(engine->mmio_base),
2069				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2070}
2071
2072static void cml_whitelist_build(struct intel_engine_cs *engine)
2073{
2074	allow_read_ctx_timestamp(engine);
2075
2076	cfl_whitelist_build(engine);
2077}
2078
2079static void icl_whitelist_build(struct intel_engine_cs *engine)
2080{
2081	struct i915_wa_list *w = &engine->whitelist;
2082
2083	allow_read_ctx_timestamp(engine);
2084
2085	switch (engine->class) {
2086	case RENDER_CLASS:
2087		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
2088		whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2089
2090		/* WaAllowUMDToModifySamplerMode:icl */
2091		whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2092
2093		/* WaEnableStateCacheRedirectToCS:icl */
2094		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2095
2096		/*
2097		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2098		 *
2099		 * This covers 4 register which are next to one another :
2100		 *   - PS_INVOCATION_COUNT
2101		 *   - PS_INVOCATION_COUNT_UDW
2102		 *   - PS_DEPTH_COUNT
2103		 *   - PS_DEPTH_COUNT_UDW
2104		 */
2105		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2106				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2107				  RING_FORCE_TO_NONPRIV_RANGE_4);
2108		break;
2109
2110	case VIDEO_DECODE_CLASS:
2111		/* hucStatusRegOffset */
2112		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2113				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2114		/* hucUKernelHdrInfoRegOffset */
2115		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2116				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2117		/* hucStatus2RegOffset */
2118		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2119				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
2120		break;
2121
2122	default:
2123		break;
2124	}
2125}
2126
2127static void tgl_whitelist_build(struct intel_engine_cs *engine)
2128{
2129	struct i915_wa_list *w = &engine->whitelist;
2130
2131	allow_read_ctx_timestamp(engine);
2132
2133	switch (engine->class) {
2134	case RENDER_CLASS:
2135		/*
2136		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2137		 * Wa_1408556865:tgl
2138		 *
2139		 * This covers 4 registers which are next to one another :
2140		 *   - PS_INVOCATION_COUNT
2141		 *   - PS_INVOCATION_COUNT_UDW
2142		 *   - PS_DEPTH_COUNT
2143		 *   - PS_DEPTH_COUNT_UDW
2144		 */
2145		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2146				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
2147				  RING_FORCE_TO_NONPRIV_RANGE_4);
2148
2149		/*
2150		 * Wa_1808121037:tgl
2151		 * Wa_14012131227:dg1
2152		 * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
2153		 */
2154		whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2155
2156		/* Wa_1806527549:tgl */
2157		whitelist_reg(w, HIZ_CHICKEN);
2158
2159		/* Required by recommended tuning setting (not a workaround) */
2160		whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
2161
2162		break;
2163	default:
2164		break;
2165	}
2166}
2167
2168static void dg2_whitelist_build(struct intel_engine_cs *engine)
2169{
2170	struct i915_wa_list *w = &engine->whitelist;
2171
2172	switch (engine->class) {
2173	case RENDER_CLASS:
2174		/* Required by recommended tuning setting (not a workaround) */
2175		whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2176
2177		break;
2178	default:
2179		break;
2180	}
2181}
2182
2183static void blacklist_trtt(struct intel_engine_cs *engine)
2184{
2185	struct i915_wa_list *w = &engine->whitelist;
2186
2187	/*
2188	 * Prevent read/write access to [0x4400, 0x4600) which covers
2189	 * the TRTT range across all engines. Note that normally userspace
2190	 * cannot access the other engines' trtt control, but for simplicity
2191	 * we cover the entire range on each engine.
2192	 */
2193	whitelist_reg_ext(w, _MMIO(0x4400),
2194			  RING_FORCE_TO_NONPRIV_DENY |
2195			  RING_FORCE_TO_NONPRIV_RANGE_64);
2196	whitelist_reg_ext(w, _MMIO(0x4500),
2197			  RING_FORCE_TO_NONPRIV_DENY |
2198			  RING_FORCE_TO_NONPRIV_RANGE_64);
2199}
2200
2201static void pvc_whitelist_build(struct intel_engine_cs *engine)
2202{
2203	/* Wa_16014440446:pvc */
2204	blacklist_trtt(engine);
2205}
2206
2207static void xelpg_whitelist_build(struct intel_engine_cs *engine)
2208{
2209	struct i915_wa_list *w = &engine->whitelist;
2210
2211	switch (engine->class) {
2212	case RENDER_CLASS:
2213		/* Required by recommended tuning setting (not a workaround) */
2214		whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2215
2216		break;
2217	default:
2218		break;
2219	}
2220}
2221
2222void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2223{
2224	struct drm_i915_private *i915 = engine->i915;
2225	struct i915_wa_list *w = &engine->whitelist;
2226
2227	wa_init_start(w, engine->gt, "whitelist", engine->name);
2228
2229	if (engine->gt->type == GT_MEDIA)
2230		; /* none yet */
2231	else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
2232		xelpg_whitelist_build(engine);
2233	else if (IS_PONTEVECCHIO(i915))
2234		pvc_whitelist_build(engine);
2235	else if (IS_DG2(i915))
2236		dg2_whitelist_build(engine);
2237	else if (IS_XEHPSDV(i915))
2238		; /* none needed */
2239	else if (GRAPHICS_VER(i915) == 12)
2240		tgl_whitelist_build(engine);
2241	else if (GRAPHICS_VER(i915) == 11)
2242		icl_whitelist_build(engine);
2243	else if (IS_COMETLAKE(i915))
2244		cml_whitelist_build(engine);
2245	else if (IS_COFFEELAKE(i915))
2246		cfl_whitelist_build(engine);
2247	else if (IS_GEMINILAKE(i915))
2248		glk_whitelist_build(engine);
2249	else if (IS_KABYLAKE(i915))
2250		kbl_whitelist_build(engine);
2251	else if (IS_BROXTON(i915))
2252		bxt_whitelist_build(engine);
2253	else if (IS_SKYLAKE(i915))
2254		skl_whitelist_build(engine);
2255	else if (GRAPHICS_VER(i915) <= 8)
2256		;
2257	else
2258		MISSING_CASE(GRAPHICS_VER(i915));
2259
2260	wa_init_finish(w);
2261}
2262
2263void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2264{
2265	const struct i915_wa_list *wal = &engine->whitelist;
2266	struct intel_uncore *uncore = engine->uncore;
2267	const u32 base = engine->mmio_base;
2268	struct i915_wa *wa;
2269	unsigned int i;
2270
2271	if (!wal->count)
2272		return;
2273
2274	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2275		intel_uncore_write(uncore,
2276				   RING_FORCE_TO_NONPRIV(base, i),
2277				   i915_mmio_reg_offset(wa->reg));
2278
2279	/* And clear the rest just in case of garbage */
2280	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2281		intel_uncore_write(uncore,
2282				   RING_FORCE_TO_NONPRIV(base, i),
2283				   i915_mmio_reg_offset(RING_NOPID(base)));
2284}
2285
2286/*
2287 * engine_fake_wa_init(), a place holder to program the registers
2288 * which are not part of an official workaround defined by the
2289 * hardware team.
2290 * Adding programming of those register inside workaround will
2291 * allow utilizing wa framework to proper application and verification.
2292 */
2293static void
2294engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2295{
2296	u8 mocs_w, mocs_r;
2297
2298	/*
2299	 * RING_CMD_CCTL specifies the default MOCS entry that will be used
2300	 * by the command streamer when executing commands that don't have
2301	 * a way to explicitly specify a MOCS setting.  The default should
2302	 * usually reference whichever MOCS entry corresponds to uncached
2303	 * behavior, although use of a WB cached entry is recommended by the
2304	 * spec in certain circumstances on specific platforms.
2305	 */
2306	if (GRAPHICS_VER(engine->i915) >= 12) {
2307		mocs_r = engine->gt->mocs.uc_index;
2308		mocs_w = engine->gt->mocs.uc_index;
2309
2310		if (HAS_L3_CCS_READ(engine->i915) &&
2311		    engine->class == COMPUTE_CLASS) {
2312			mocs_r = engine->gt->mocs.wb_index;
2313
2314			/*
2315			 * Even on the few platforms where MOCS 0 is a
2316			 * legitimate table entry, it's never the correct
2317			 * setting to use here; we can assume the MOCS init
2318			 * just forgot to initialize wb_index.
2319			 */
2320			drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
2321		}
2322
2323		wa_masked_field_set(wal,
2324				    RING_CMD_CCTL(engine->mmio_base),
2325				    CMD_CCTL_MOCS_MASK,
2326				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
2327	}
2328}
2329
2330static void
2331rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2332{
2333	struct drm_i915_private *i915 = engine->i915;
2334	struct intel_gt *gt = engine->gt;
2335
2336	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2337	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
2338		/* Wa_22014600077 */
2339		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2340				 ENABLE_EU_COUNT_FOR_TDL_FLUSH);
2341	}
2342
2343	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2344	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2345	    IS_DG2(i915)) {
2346		/* Wa_1509727124 */
2347		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2348				 SC_DISABLE_POWER_OPTIMIZATION_EBB);
2349	}
2350
2351	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2352	    IS_DG2(i915)) {
2353		/* Wa_22012856258 */
2354		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2355				 GEN12_DISABLE_READ_SUPPRESSION);
2356	}
2357
2358	if (IS_DG2(i915)) {
2359		/*
2360		 * Wa_22010960976:dg2
2361		 * Wa_14013347512:dg2
2362		 */
2363		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
2364				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
2365	}
2366
2367	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
2368	    IS_DG2(i915)) {
2369		/* Wa_14015150844 */
2370		wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
2371			   _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
2372			   0, true);
2373	}
2374
2375	if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2376	    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2377		/*
2378		 * Wa_1606700617:tgl,dg1,adl-p
2379		 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
2380		 * Wa_14010826681:tgl,dg1,rkl,adl-p
2381		 * Wa_18019627453:dg2
2382		 */
2383		wa_masked_en(wal,
2384			     GEN9_CS_DEBUG_MODE1,
2385			     FF_DOP_CLOCK_GATE_DISABLE);
2386	}
2387
2388	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
2389	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2390		/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2391		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2392
2393		/*
2394		 * Wa_1407928979:tgl A*
2395		 * Wa_18011464164:tgl[B0+],dg1[B0+]
2396		 * Wa_22010931296:tgl[B0+],dg1[B0+]
2397		 * Wa_14010919138:rkl,dg1,adl-s,adl-p
2398		 */
2399		wa_write_or(wal, GEN7_FF_THREAD_MODE,
2400			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2401
2402		/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
2403		wa_mcr_masked_en(wal,
2404				 GEN10_SAMPLER_MODE,
2405				 ENABLE_SMALLPL);
2406	}
2407
2408	if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
2409	    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2410		/* Wa_1409804808 */
2411		wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2412				 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
2413
2414		/* Wa_14010229206 */
2415		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
2416	}
2417
2418	if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
2419		/*
2420		 * Wa_1607297627
2421		 *
2422		 * On TGL and RKL there are multiple entries for this WA in the
2423		 * BSpec; some indicate this is an A0-only WA, others indicate
2424		 * it applies to all steppings so we trust the "all steppings."
2425		 */
2426		wa_masked_en(wal,
2427			     RING_PSMI_CTL(RENDER_RING_BASE),
2428			     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
2429			     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
2430	}
2431
2432	if (GRAPHICS_VER(i915) == 11) {
2433		/* This is not an Wa. Enable for better image quality */
2434		wa_masked_en(wal,
2435			     _3D_CHICKEN3,
2436			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
2437
2438		/*
2439		 * Wa_1405543622:icl
2440		 * Formerly known as WaGAPZPriorityScheme
2441		 */
2442		wa_write_or(wal,
2443			    GEN8_GARBCNTL,
2444			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
2445
2446		/*
2447		 * Wa_1604223664:icl
2448		 * Formerly known as WaL3BankAddressHashing
2449		 */
2450		wa_write_clr_set(wal,
2451				 GEN8_GARBCNTL,
2452				 GEN11_HASH_CTRL_EXCL_MASK,
2453				 GEN11_HASH_CTRL_EXCL_BIT0);
2454		wa_write_clr_set(wal,
2455				 GEN11_GLBLINVL,
2456				 GEN11_BANK_HASH_ADDR_EXCL_MASK,
2457				 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2458
2459		/*
2460		 * Wa_1405733216:icl
2461		 * Formerly known as WaDisableCleanEvicts
2462		 */
2463		wa_mcr_write_or(wal,
2464				GEN8_L3SQCREG4,
2465				GEN11_LQSC_CLEAN_EVICT_DISABLE);
2466
2467		/* Wa_1606682166:icl */
2468		wa_write_or(wal,
2469			    GEN7_SARCHKMD,
2470			    GEN7_DISABLE_SAMPLER_PREFETCH);
2471
2472		/* Wa_1409178092:icl */
2473		wa_mcr_write_clr_set(wal,
2474				     GEN11_SCRATCH2,
2475				     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
2476				     0);
2477
2478		/* WaEnable32PlaneMode:icl */
2479		wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
2480			     GEN11_ENABLE_32_PLANE_MODE);
2481
2482		/*
2483		 * Wa_1408767742:icl[a2..forever],ehl[all]
2484		 * Wa_1605460711:icl[a0..c0]
2485		 */
2486		wa_write_or(wal,
2487			    GEN7_FF_THREAD_MODE,
2488			    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2489
2490		/* Wa_22010271021 */
2491		wa_masked_en(wal,
2492			     GEN9_CS_DEBUG_MODE1,
2493			     FF_DOP_CLOCK_GATE_DISABLE);
2494	}
2495
2496	/*
2497	 * Intel platforms that support fine-grained preemption (i.e., gen9 and
2498	 * beyond) allow the kernel-mode driver to choose between two different
2499	 * options for controlling preemption granularity and behavior.
2500	 *
2501	 * Option 1 (hardware default):
2502	 *   Preemption settings are controlled in a global manner via
2503	 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
2504	 *   and settings chosen by the kernel-mode driver will apply to all
2505	 *   userspace clients.
2506	 *
2507	 * Option 2:
2508	 *   Preemption settings are controlled on a per-context basis via
2509	 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
2510	 *   context switch and is writable by userspace (e.g., via
2511	 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2512	 *   which allows different userspace drivers/clients to select
2513	 *   different settings, or to change those settings on the fly in
2514	 *   response to runtime needs.  This option was known by name
2515	 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
2516	 *   that name is somewhat misleading as other non-granularity
2517	 *   preemption settings are also impacted by this decision.
2518	 *
2519	 * On Linux, our policy has always been to let userspace drivers
2520	 * control preemption granularity/settings (Option 2).  This was
2521	 * originally mandatory on gen9 to prevent ABI breakage (old gen9
2522	 * userspace developed before object-level preemption was enabled would
2523	 * not behave well if i915 were to go with Option 1 and enable that
2524	 * preemption in a global manner).  On gen9 each context would have
2525	 * object-level preemption disabled by default (see
2526	 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2527	 * userspace drivers could opt-in to object-level preemption as they
2528	 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
2529	 * even though it is no longer necessary for ABI compatibility when
2530	 * enabling a new platform, it does ensure that userspace will be able
2531	 * to implement any workarounds that show up requiring temporary
2532	 * adjustments to preemption behavior at runtime.
2533	 *
2534	 * Notes/Workarounds:
2535	 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
2536	 *      CS_CHICKEN1[0] does not disable object-level preemption as
2537	 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2538	 *      using Option 1).  Effectively this means userspace is unable
2539	 *      to disable object-level preemption on these platforms/steppings
2540	 *      despite the setting here.
2541	 *
2542	 *  - Wa_16013994831:  May require that userspace program
2543	 *      CS_CHICKEN1[10] when certain runtime conditions are true.
2544	 *      Userspace requires Option 2 to be in effect for their update of
2545	 *      CS_CHICKEN1[10] to be effective.
2546	 *
2547	 * Other workarounds may appear in the future that will also require
2548	 * Option 2 behavior to allow proper userspace implementation.
2549	 */
2550	if (GRAPHICS_VER(i915) >= 9)
2551		wa_masked_en(wal,
2552			     GEN7_FF_SLICE_CS_CHICKEN1,
2553			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
2554
2555	if (IS_SKYLAKE(i915) ||
2556	    IS_KABYLAKE(i915) ||
2557	    IS_COFFEELAKE(i915) ||
2558	    IS_COMETLAKE(i915)) {
2559		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2560		wa_write_or(wal,
2561			    GEN8_GARBCNTL,
2562			    GEN9_GAPS_TSV_CREDIT_DISABLE);
2563	}
2564
2565	if (IS_BROXTON(i915)) {
2566		/* WaDisablePooledEuLoadBalancingFix:bxt */
2567		wa_masked_en(wal,
2568			     FF_SLICE_CS_CHICKEN2,
2569			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
2570	}
2571
2572	if (GRAPHICS_VER(i915) == 9) {
2573		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2574		wa_masked_en(wal,
2575			     GEN9_CSFE_CHICKEN1_RCS,
2576			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
2577
2578		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2579		wa_mcr_write_or(wal,
2580				BDW_SCRATCH1,
2581				GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2582
2583		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2584		if (IS_GEN9_LP(i915))
2585			wa_mcr_write_clr_set(wal,
2586					     GEN8_L3SQCREG1,
2587					     L3_PRIO_CREDITS_MASK,
2588					     L3_GENERAL_PRIO_CREDITS(62) |
2589					     L3_HIGH_PRIO_CREDITS(2));
2590
2591		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2592		wa_mcr_write_or(wal,
2593				GEN8_L3SQCREG4,
2594				GEN8_LQSC_FLUSH_COHERENT_LINES);
2595
2596		/* Disable atomics in L3 to prevent unrecoverable hangs */
2597		wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
2598				 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2599		wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
2600				     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2601		wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
2602				     EVICTION_PERF_FIX_ENABLE, 0);
2603	}
2604
2605	if (IS_HASWELL(i915)) {
2606		/* WaSampleCChickenBitEnable:hsw */
2607		wa_masked_en(wal,
2608			     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
2609
2610		wa_masked_dis(wal,
2611			      CACHE_MODE_0_GEN7,
2612			      /* enable HiZ Raw Stall Optimization */
2613			      HIZ_RAW_STALL_OPT_DISABLE);
2614	}
2615
2616	if (IS_VALLEYVIEW(i915)) {
2617		/* WaDisableEarlyCull:vlv */
2618		wa_masked_en(wal,
2619			     _3D_CHICKEN3,
2620			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2621
2622		/*
2623		 * WaVSThreadDispatchOverride:ivb,vlv
2624		 *
2625		 * This actually overrides the dispatch
2626		 * mode for all thread types.
2627		 */
2628		wa_write_clr_set(wal,
2629				 GEN7_FF_THREAD_MODE,
2630				 GEN7_FF_SCHED_MASK,
2631				 GEN7_FF_TS_SCHED_HW |
2632				 GEN7_FF_VS_SCHED_HW |
2633				 GEN7_FF_DS_SCHED_HW);
2634
2635		/* WaPsdDispatchEnable:vlv */
2636		/* WaDisablePSDDualDispatchEnable:vlv */
2637		wa_masked_en(wal,
2638			     GEN7_HALF_SLICE_CHICKEN1,
2639			     GEN7_MAX_PS_THREAD_DEP |
2640			     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2641	}
2642
2643	if (IS_IVYBRIDGE(i915)) {
2644		/* WaDisableEarlyCull:ivb */
2645		wa_masked_en(wal,
2646			     _3D_CHICKEN3,
2647			     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
2648
2649		if (0) { /* causes HiZ corruption on ivb:gt1 */
2650			/* enable HiZ Raw Stall Optimization */
2651			wa_masked_dis(wal,
2652				      CACHE_MODE_0_GEN7,
2653				      HIZ_RAW_STALL_OPT_DISABLE);
2654		}
2655
2656		/*
2657		 * WaVSThreadDispatchOverride:ivb,vlv
2658		 *
2659		 * This actually overrides the dispatch
2660		 * mode for all thread types.
2661		 */
2662		wa_write_clr_set(wal,
2663				 GEN7_FF_THREAD_MODE,
2664				 GEN7_FF_SCHED_MASK,
2665				 GEN7_FF_TS_SCHED_HW |
2666				 GEN7_FF_VS_SCHED_HW |
2667				 GEN7_FF_DS_SCHED_HW);
2668
2669		/* WaDisablePSDDualDispatchEnable:ivb */
2670		if (IS_IVB_GT1(i915))
2671			wa_masked_en(wal,
2672				     GEN7_HALF_SLICE_CHICKEN1,
2673				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
2674	}
2675
2676	if (GRAPHICS_VER(i915) == 7) {
2677		/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2678		wa_masked_en(wal,
2679			     RING_MODE_GEN7(RENDER_RING_BASE),
2680			     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
2681
2682		/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
2683		wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
2684
2685		/*
2686		 * BSpec says this must be set, even though
2687		 * WaDisable4x2SubspanOptimization:ivb,hsw
2688		 * WaDisable4x2SubspanOptimization isn't listed for VLV.
2689		 */
2690		wa_masked_en(wal,
2691			     CACHE_MODE_1,
2692			     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
2693
2694		/*
2695		 * BSpec recommends 8x4 when MSAA is used,
2696		 * however in practice 16x4 seems fastest.
2697		 *
2698		 * Note that PS/WM thread counts depend on the WIZ hashing
2699		 * disable bit, which we don't touch here, but it's good
2700		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2701		 */
2702		wa_masked_field_set(wal,
2703				    GEN7_GT_MODE,
2704				    GEN6_WIZ_HASHING_MASK,
2705				    GEN6_WIZ_HASHING_16x4);
2706	}
2707
2708	if (IS_GRAPHICS_VER(i915, 6, 7))
2709		/*
2710		 * We need to disable the AsyncFlip performance optimisations in
2711		 * order to use MI_WAIT_FOR_EVENT within the CS. It should
2712		 * already be programmed to '1' on all products.
2713		 *
2714		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2715		 */
2716		wa_masked_en(wal,
2717			     RING_MI_MODE(RENDER_RING_BASE),
2718			     ASYNC_FLIP_PERF_DISABLE);
2719
2720	if (GRAPHICS_VER(i915) == 6) {
2721		/*
2722		 * Required for the hardware to program scanline values for
2723		 * waiting
2724		 * WaEnableFlushTlbInvalidationMode:snb
2725		 */
2726		wa_masked_en(wal,
2727			     GFX_MODE,
2728			     GFX_TLB_INVALIDATE_EXPLICIT);
2729
2730		/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
2731		wa_masked_en(wal,
2732			     _3D_CHICKEN,
2733			     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
2734
2735		wa_masked_en(wal,
2736			     _3D_CHICKEN3,
2737			     /* WaStripsFansDisableFastClipPerformanceFix:snb */
2738			     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
2739			     /*
2740			      * Bspec says:
2741			      * "This bit must be set if 3DSTATE_CLIP clip mode is set
2742			      * to normal and 3DSTATE_SF number of SF output attributes
2743			      * is more than 16."
2744			      */
2745			     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
2746
2747		/*
2748		 * BSpec recommends 8x4 when MSAA is used,
2749		 * however in practice 16x4 seems fastest.
2750		 *
2751		 * Note that PS/WM thread counts depend on the WIZ hashing
2752		 * disable bit, which we don't touch here, but it's good
2753		 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
2754		 */
2755		wa_masked_field_set(wal,
2756				    GEN6_GT_MODE,
2757				    GEN6_WIZ_HASHING_MASK,
2758				    GEN6_WIZ_HASHING_16x4);
2759
2760		/* WaDisable_RenderCache_OperationalFlush:snb */
2761		wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
2762
2763		/*
2764		 * From the Sandybridge PRM, volume 1 part 3, page 24:
2765		 * "If this bit is set, STCunit will have LRA as replacement
2766		 *  policy. [...] This bit must be reset. LRA replacement
2767		 *  policy is not supported."
2768		 */
2769		wa_masked_dis(wal,
2770			      CACHE_MODE_0,
2771			      CM0_STC_EVICT_DISABLE_LRA_SNB);
2772	}
2773
2774	if (IS_GRAPHICS_VER(i915, 4, 6))
2775		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
2776		wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2777		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2778		       /* XXX bit doesn't stick on Broadwater */
2779		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2780
2781	if (GRAPHICS_VER(i915) == 4)
2782		/*
2783		 * Disable CONSTANT_BUFFER before it is loaded from the context
2784		 * image. For as it is loaded, it is executed and the stored
2785		 * address may no longer be valid, leading to a GPU hang.
2786		 *
2787		 * This imposes the requirement that userspace reload their
2788		 * CONSTANT_BUFFER on every batch, fortunately a requirement
2789		 * they are already accustomed to from before contexts were
2790		 * enabled.
2791		 */
2792		wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2793		       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
2794		       0 /* XXX bit doesn't stick on Broadwater */,
2795		       true);
2796}
2797
2798static void
2799xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2800{
2801	struct drm_i915_private *i915 = engine->i915;
2802
2803	/* WaKBLVECSSemaphoreWaitPoll:kbl */
2804	if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2805		wa_write(wal,
2806			 RING_SEMA_WAIT_POLL(engine->mmio_base),
2807			 1);
2808	}
2809	/* Wa_16018031267, Wa_16018063123 */
2810	if (NEEDS_FASTCOLOR_BLT_WABB(engine))
2811		wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
2812				    XEHP_BLITTER_SCHEDULING_MODE_MASK,
2813				    XEHP_BLITTER_ROUND_ROBIN_MODE);
2814}
2815
2816static void
2817ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2818{
2819	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
2820		/* Wa_14014999345:pvc */
2821		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
2822	}
2823}
2824
2825/*
2826 * The bspec performance guide has recommended MMIO tuning settings.  These
2827 * aren't truly "workarounds" but we want to program them with the same
2828 * workaround infrastructure to ensure that they're automatically added to
2829 * the GuC save/restore lists, re-applied at the right times, and checked for
2830 * any conflicting programming requested by real workarounds.
2831 *
2832 * Programming settings should be added here only if their registers are not
2833 * part of an engine's register state context.  If a register is part of a
2834 * context, then any tuning settings should be programmed in an appropriate
2835 * function invoked by __intel_engine_init_ctx_wa().
2836 */
2837static void
2838add_render_compute_tuning_settings(struct intel_gt *gt,
2839				   struct i915_wa_list *wal)
2840{
2841	struct drm_i915_private *i915 = gt->i915;
2842
2843	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
2844		wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
2845
2846	/*
2847	 * This tuning setting proves beneficial only on ATS-M designs; the
2848	 * default "age based" setting is optimal on regular DG2 and other
2849	 * platforms.
2850	 */
2851	if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2852		wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
2853					THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2854
2855	if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
2856		wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
2857}
2858
2859static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2860{
2861	struct intel_gt *gt = engine->gt;
2862	u32 mode;
2863
2864	if (!IS_DG2(gt->i915))
2865		return;
2866
2867	/*
2868	 * Wa_14019159160: This workaround, along with others, leads to
2869	 * significant challenges in utilizing load balancing among the
2870	 * CCS slices. Consequently, an architectural decision has been
2871	 * made to completely disable automatic CCS load balancing.
2872	 */
2873	wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
2874
2875	/*
2876	 * After having disabled automatic load balancing we need to
2877	 * assign all slices to a single CCS. We will call it CCS mode 1
2878	 */
2879	mode = intel_gt_apply_ccs_mode(gt);
2880	wa_masked_en(wal, XEHP_CCS_MODE, mode);
2881}
2882
2883/*
2884 * The workarounds in this function apply to shared registers in
2885 * the general render reset domain that aren't tied to a
2886 * specific engine.  Since all render+compute engines get reset
2887 * together, and the contents of these registers are lost during
2888 * the shared render domain reset, we'll define such workarounds
2889 * here and then add them to just a single RCS or CCS engine's
2890 * workaround list (whichever engine has the XXXX flag).
2891 */
2892static void
2893general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2894{
2895	struct drm_i915_private *i915 = engine->i915;
2896	struct intel_gt *gt = engine->gt;
2897
2898	add_render_compute_tuning_settings(gt, wal);
2899
2900	if (GRAPHICS_VER(i915) >= 11) {
2901		/* This is not a Wa (although referred to as
2902		 * WaSetInidrectStateOverride in places), this allows
2903		 * applications that reference sampler states through
2904		 * the BindlessSamplerStateBaseAddress to have their
2905		 * border color relative to DynamicStateBaseAddress
2906		 * rather than BindlessSamplerStateBaseAddress.
2907		 *
2908		 * Otherwise SAMPLER_STATE border colors have to be
2909		 * copied in multiple heaps (DynamicStateBaseAddress &
2910		 * BindlessSamplerStateBaseAddress)
2911		 *
2912		 * BSpec: 46052
2913		 */
2914		wa_mcr_masked_en(wal,
2915				 GEN10_SAMPLER_MODE,
2916				 GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
2917	}
2918
2919	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
2920	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
2921	    IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
2922		/* Wa_14017856879 */
2923		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
2924
2925	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2926	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2927		/*
2928		 * Wa_14017066071
2929		 * Wa_14017654203
2930		 */
2931		wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2932				 MTL_DISABLE_SAMPLER_SC_OOO);
2933
2934	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2935		/* Wa_22015279794 */
2936		wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2937				 DISABLE_PREFETCH_INTO_IC);
2938
2939	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2940	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2941	    IS_DG2(i915)) {
2942		/* Wa_22013037850 */
2943		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
2944				DISABLE_128B_EVICTION_COMMAND_UDW);
2945
2946		/* Wa_18017747507 */
2947		wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
2948	}
2949
2950	if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2951	    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2952	    IS_PONTEVECCHIO(i915) ||
2953	    IS_DG2(i915)) {
2954		/* Wa_22014226127 */
2955		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
2956	}
2957
2958	if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
2959		/* Wa_14015227452:dg2,pvc */
2960		wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
2961
2962		/* Wa_16015675438:dg2,pvc */
2963		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
2964	}
2965
2966	if (IS_DG2(i915)) {
2967		/*
2968		 * Wa_16011620976:dg2_g11
2969		 * Wa_22015475538:dg2
2970		 */
2971		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
2972
2973		/* Wa_18028616096 */
2974		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
2975	}
2976
2977	if (IS_DG2_G11(i915)) {
2978		/*
2979		 * Wa_22012826095:dg2
2980		 * Wa_22013059131:dg2
2981		 */
2982		wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
2983				     MAXREQS_PER_BANK,
2984				     REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
2985
2986		/* Wa_22013059131:dg2 */
2987		wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
2988				FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
2989
2990		/*
2991		 * Wa_22012654132
2992		 *
2993		 * Note that register 0xE420 is write-only and cannot be read
2994		 * back for verification on DG2 (due to Wa_14012342262), so
2995		 * we need to explicitly skip the readback.
2996		 */
2997		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
2998			   _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
2999			   0 /* write-only, so skip validation */,
3000			   true);
3001	}
3002
3003	if (IS_XEHPSDV(i915)) {
3004		/* Wa_1409954639 */
3005		wa_mcr_masked_en(wal,
3006				 GEN8_ROW_CHICKEN,
3007				 SYSTOLIC_DOP_CLOCK_GATING_DIS);
3008
3009		/* Wa_1607196519 */
3010		wa_mcr_masked_en(wal,
3011				 GEN9_ROW_CHICKEN4,
3012				 GEN12_DISABLE_GRF_CLEAR);
3013
3014		/* Wa_14010449647:xehpsdv */
3015		wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
3016				 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
3017	}
3018}
3019
3020static void
3021engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
3022{
3023	if (GRAPHICS_VER(engine->i915) < 4)
3024		return;
3025
3026	engine_fake_wa_init(engine, wal);
3027
3028	/*
3029	 * These are common workarounds that just need to applied
3030	 * to a single RCS/CCS engine's workaround list since
3031	 * they're reset as part of the general render domain reset.
3032	 */
3033	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
3034		general_render_compute_wa_init(engine, wal);
3035		ccs_engine_wa_mode(engine, wal);
3036	}
3037
3038	if (engine->class == COMPUTE_CLASS)
3039		ccs_engine_wa_init(engine, wal);
3040	else if (engine->class == RENDER_CLASS)
3041		rcs_engine_wa_init(engine, wal);
3042	else
3043		xcs_engine_wa_init(engine, wal);
3044}
3045
3046void intel_engine_init_workarounds(struct intel_engine_cs *engine)
3047{
3048	struct i915_wa_list *wal = &engine->wa_list;
3049
3050	wa_init_start(wal, engine->gt, "engine", engine->name);
3051	engine_init_workarounds(engine, wal);
3052	wa_init_finish(wal);
3053}
3054
3055void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
3056{
3057	wa_list_apply(&engine->wa_list);
3058}
3059
3060static const struct i915_range mcr_ranges_gen8[] = {
3061	{ .start = 0x5500, .end = 0x55ff },
3062	{ .start = 0x7000, .end = 0x7fff },
3063	{ .start = 0x9400, .end = 0x97ff },
3064	{ .start = 0xb000, .end = 0xb3ff },
3065	{ .start = 0xe000, .end = 0xe7ff },
3066	{},
3067};
3068
3069static const struct i915_range mcr_ranges_gen12[] = {
3070	{ .start =  0x8150, .end =  0x815f },
3071	{ .start =  0x9520, .end =  0x955f },
3072	{ .start =  0xb100, .end =  0xb3ff },
3073	{ .start =  0xde80, .end =  0xe8ff },
3074	{ .start = 0x24a00, .end = 0x24a7f },
3075	{},
3076};
3077
3078static const struct i915_range mcr_ranges_xehp[] = {
3079	{ .start =  0x4000, .end =  0x4aff },
3080	{ .start =  0x5200, .end =  0x52ff },
3081	{ .start =  0x5400, .end =  0x7fff },
3082	{ .start =  0x8140, .end =  0x815f },
3083	{ .start =  0x8c80, .end =  0x8dff },
3084	{ .start =  0x94d0, .end =  0x955f },
3085	{ .start =  0x9680, .end =  0x96ff },
3086	{ .start =  0xb000, .end =  0xb3ff },
3087	{ .start =  0xc800, .end =  0xcfff },
3088	{ .start =  0xd800, .end =  0xd8ff },
3089	{ .start =  0xdc00, .end =  0xffff },
3090	{ .start = 0x17000, .end = 0x17fff },
3091	{ .start = 0x24a00, .end = 0x24a7f },
3092	{},
3093};
3094
3095static bool mcr_range(struct drm_i915_private *i915, u32 offset)
3096{
3097	const struct i915_range *mcr_ranges;
3098	int i;
3099
3100	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
3101		mcr_ranges = mcr_ranges_xehp;
3102	else if (GRAPHICS_VER(i915) >= 12)
3103		mcr_ranges = mcr_ranges_gen12;
3104	else if (GRAPHICS_VER(i915) >= 8)
3105		mcr_ranges = mcr_ranges_gen8;
3106	else
3107		return false;
3108
3109	/*
3110	 * Registers in these ranges are affected by the MCR selector
3111	 * which only controls CPU initiated MMIO. Routing does not
3112	 * work for CS access so we cannot verify them on this path.
3113	 */
3114	for (i = 0; mcr_ranges[i].start; i++)
3115		if (offset >= mcr_ranges[i].start &&
3116		    offset <= mcr_ranges[i].end)
3117			return true;
3118
3119	return false;
3120}
3121
3122static int
3123wa_list_srm(struct i915_request *rq,
3124	    const struct i915_wa_list *wal,
3125	    struct i915_vma *vma)
3126{
3127	struct drm_i915_private *i915 = rq->i915;
3128	unsigned int i, count = 0;
3129	const struct i915_wa *wa;
3130	u32 srm, *cs;
3131
3132	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
3133	if (GRAPHICS_VER(i915) >= 8)
3134		srm++;
3135
3136	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3137		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
3138			count++;
3139	}
3140
3141	cs = intel_ring_begin(rq, 4 * count);
3142	if (IS_ERR(cs))
3143		return PTR_ERR(cs);
3144
3145	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3146		u32 offset = i915_mmio_reg_offset(wa->reg);
3147
3148		if (mcr_range(i915, offset))
3149			continue;
3150
3151		*cs++ = srm;
3152		*cs++ = offset;
3153		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
3154		*cs++ = 0;
3155	}
3156	intel_ring_advance(rq, cs);
3157
3158	return 0;
3159}
3160
3161static int engine_wa_list_verify(struct intel_context *ce,
3162				 const struct i915_wa_list * const wal,
3163				 const char *from)
3164{
3165	const struct i915_wa *wa;
3166	struct i915_request *rq;
3167	struct i915_vma *vma;
3168	struct i915_gem_ww_ctx ww;
3169	unsigned int i;
3170	u32 *results;
3171	int err;
3172
3173	if (!wal->count)
3174		return 0;
3175
3176	vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
3177					   wal->count * sizeof(u32));
3178	if (IS_ERR(vma))
3179		return PTR_ERR(vma);
3180
3181	intel_engine_pm_get(ce->engine);
3182	i915_gem_ww_ctx_init(&ww, false);
3183retry:
3184	err = i915_gem_object_lock(vma->obj, &ww);
3185	if (err == 0)
3186		err = intel_context_pin_ww(ce, &ww);
3187	if (err)
3188		goto err_pm;
3189
3190	err = i915_vma_pin_ww(vma, &ww, 0, 0,
3191			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
3192	if (err)
3193		goto err_unpin;
3194
3195	rq = i915_request_create(ce);
3196	if (IS_ERR(rq)) {
3197		err = PTR_ERR(rq);
3198		goto err_vma;
3199	}
3200
3201	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3202	if (err == 0)
3203		err = wa_list_srm(rq, wal, vma);
3204
3205	i915_request_get(rq);
3206	if (err)
3207		i915_request_set_error_once(rq, err);
3208	i915_request_add(rq);
3209
3210	if (err)
3211		goto err_rq;
3212
3213	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3214		err = -ETIME;
3215		goto err_rq;
3216	}
3217
3218	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
3219	if (IS_ERR(results)) {
3220		err = PTR_ERR(results);
3221		goto err_rq;
3222	}
3223
3224	err = 0;
3225	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3226		if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
3227			continue;
3228
3229		if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3230			err = -ENXIO;
3231	}
3232
3233	i915_gem_object_unpin_map(vma->obj);
3234
3235err_rq:
3236	i915_request_put(rq);
3237err_vma:
3238	i915_vma_unpin(vma);
3239err_unpin:
3240	intel_context_unpin(ce);
3241err_pm:
3242	if (err == -EDEADLK) {
3243		err = i915_gem_ww_ctx_backoff(&ww);
3244		if (!err)
3245			goto retry;
3246	}
3247	i915_gem_ww_ctx_fini(&ww);
3248	intel_engine_pm_put(ce->engine);
3249	i915_vma_put(vma);
3250	return err;
3251}
3252
3253int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
3254				    const char *from)
3255{
3256	return engine_wa_list_verify(engine->kernel_context,
3257				     &engine->wa_list,
3258				     from);
3259}
3260
3261#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3262#include "selftest_workarounds.c"
3263#endif