Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * SPDX-License-Identifier: MIT
   3 *
   4 * Copyright © 2014-2018 Intel Corporation
   5 */
   6
   7#include "i915_drv.h"
   8#include "intel_context.h"
   9#include "intel_gt.h"
  10#include "intel_workarounds.h"
  11
  12/**
  13 * DOC: Hardware workarounds
  14 *
  15 * This file is intended as a central place to implement most [1]_ of the
  16 * required workarounds for hardware to work as originally intended. They fall
  17 * in five basic categories depending on how/when they are applied:
  18 *
  19 * - Workarounds that touch registers that are saved/restored to/from the HW
  20 *   context image. The list is emitted (via Load Register Immediate commands)
  21 *   everytime a new context is created.
  22 * - GT workarounds. The list of these WAs is applied whenever these registers
  23 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
  24 * - Display workarounds. The list is applied during display clock-gating
  25 *   initialization.
  26 * - Workarounds that whitelist a privileged register, so that UMDs can manage
  27 *   them directly. This is just a special case of a MMMIO workaround (as we
  28 *   write the list of these to/be-whitelisted registers to some special HW
  29 *   registers).
  30 * - Workaround batchbuffers, that get executed automatically by the hardware
  31 *   on every HW context restore.
  32 *
  33 * .. [1] Please notice that there are other WAs that, due to their nature,
  34 *    cannot be applied from a central place. Those are peppered around the rest
  35 *    of the code, as needed.
  36 *
  37 * .. [2] Technically, some registers are powercontext saved & restored, so they
  38 *    survive a suspend/resume. In practice, writing them again is not too
  39 *    costly and simplifies things. We can revisit this in the future.
  40 *
  41 * Layout
  42 * ~~~~~~
  43 *
  44 * Keep things in this file ordered by WA type, as per the above (context, GT,
  45 * display, register whitelist, batchbuffer). Then, inside each type, keep the
  46 * following order:
  47 *
  48 * - Infrastructure functions and macros
  49 * - WAs per platform in standard gen/chrono order
  50 * - Public functions to init or apply the given workaround type.
  51 */
  52
  53static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
  54{
  55	wal->name = name;
  56	wal->engine_name = engine_name;
  57}
  58
  59#define WA_LIST_CHUNK (1 << 4)
  60
  61static void wa_init_finish(struct i915_wa_list *wal)
  62{
  63	/* Trim unused entries. */
  64	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
  65		struct i915_wa *list = kmemdup(wal->list,
  66					       wal->count * sizeof(*list),
  67					       GFP_KERNEL);
  68
  69		if (list) {
  70			kfree(wal->list);
  71			wal->list = list;
  72		}
  73	}
  74
  75	if (!wal->count)
  76		return;
  77
  78	DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
  79			 wal->wa_count, wal->name, wal->engine_name);
  80}
  81
  82static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
  83{
  84	unsigned int addr = i915_mmio_reg_offset(wa->reg);
  85	unsigned int start = 0, end = wal->count;
  86	const unsigned int grow = WA_LIST_CHUNK;
  87	struct i915_wa *wa_;
  88
  89	GEM_BUG_ON(!is_power_of_2(grow));
  90
  91	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
  92		struct i915_wa *list;
  93
  94		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
  95				     GFP_KERNEL);
  96		if (!list) {
  97			DRM_ERROR("No space for workaround init!\n");
  98			return;
  99		}
 100
 101		if (wal->list)
 102			memcpy(list, wal->list, sizeof(*wa) * wal->count);
 103
 104		wal->list = list;
 105	}
 106
 107	while (start < end) {
 108		unsigned int mid = start + (end - start) / 2;
 109
 110		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
 111			start = mid + 1;
 112		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
 113			end = mid;
 114		} else {
 115			wa_ = &wal->list[mid];
 116
 117			if ((wa->mask & ~wa_->mask) == 0) {
 118				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
 119					  i915_mmio_reg_offset(wa_->reg),
 120					  wa_->mask, wa_->val);
 121
 122				wa_->val &= ~wa->mask;
 123			}
 124
 125			wal->wa_count++;
 126			wa_->val |= wa->val;
 127			wa_->mask |= wa->mask;
 128			wa_->read |= wa->read;
 129			return;
 130		}
 131	}
 132
 133	wal->wa_count++;
 134	wa_ = &wal->list[wal->count++];
 135	*wa_ = *wa;
 136
 137	while (wa_-- > wal->list) {
 138		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
 139			   i915_mmio_reg_offset(wa_[1].reg));
 140		if (i915_mmio_reg_offset(wa_[1].reg) >
 141		    i915_mmio_reg_offset(wa_[0].reg))
 142			break;
 143
 144		swap(wa_[1], wa_[0]);
 145	}
 146}
 147
 148static void
 149wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 150		   u32 val)
 151{
 152	struct i915_wa wa = {
 153		.reg  = reg,
 154		.mask = mask,
 155		.val  = val,
 156		.read = mask,
 157	};
 158
 159	_wa_add(wal, &wa);
 160}
 161
 162static void
 163wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 164{
 165	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
 166}
 167
 168static void
 169wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 170{
 171	wa_write_masked_or(wal, reg, ~0, val);
 172}
 173
 174static void
 175wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 176{
 177	wa_write_masked_or(wal, reg, val, val);
 178}
 179
 180#define WA_SET_BIT_MASKED(addr, mask) \
 181	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 182
 183#define WA_CLR_BIT_MASKED(addr, mask) \
 184	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
 185
 186#define WA_SET_FIELD_MASKED(addr, mask, value) \
 187	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 188
 189static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 190				      struct i915_wa_list *wal)
 191{
 192	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 193
 194	/* WaDisableAsyncFlipPerfMode:bdw,chv */
 195	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
 196
 197	/* WaDisablePartialInstShootdown:bdw,chv */
 198	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 199			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 200
 201	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 202	 * workaround for for a possible hang in the unlikely event a TLB
 203	 * invalidation occurs during a PSD flush.
 204	 */
 205	/* WaForceEnableNonCoherent:bdw,chv */
 206	/* WaHdcDisableFetchWhenMasked:bdw,chv */
 207	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 208			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
 209			  HDC_FORCE_NON_COHERENT);
 210
 211	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
 212	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
 213	 *  polygons in the same 8x4 pixel/sample area to be processed without
 214	 *  stalling waiting for the earlier ones to write to Hierarchical Z
 215	 *  buffer."
 216	 *
 217	 * This optimization is off by default for BDW and CHV; turn it on.
 218	 */
 219	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 220
 221	/* Wa4x4STCOptimizationDisable:bdw,chv */
 222	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 223
 224	/*
 225	 * BSpec recommends 8x4 when MSAA is used,
 226	 * however in practice 16x4 seems fastest.
 227	 *
 228	 * Note that PS/WM thread counts depend on the WIZ hashing
 229	 * disable bit, which we don't touch here, but it's good
 230	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 231	 */
 232	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 233			    GEN6_WIZ_HASHING_MASK,
 234			    GEN6_WIZ_HASHING_16x4);
 235}
 236
 237static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
 238				     struct i915_wa_list *wal)
 239{
 240	struct drm_i915_private *i915 = engine->i915;
 241
 242	gen8_ctx_workarounds_init(engine, wal);
 243
 244	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 245	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 246
 247	/* WaDisableDopClockGating:bdw
 248	 *
 249	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
 250	 * to disable EUTC clock gating.
 251	 */
 252	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 253			  DOP_CLOCK_GATING_DISABLE);
 254
 255	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 256			  GEN8_SAMPLER_POWER_BYPASS_DIS);
 257
 258	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 259			  /* WaForceContextSaveRestoreNonCoherent:bdw */
 260			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 261			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
 262			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 263}
 264
 265static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
 266				     struct i915_wa_list *wal)
 267{
 268	gen8_ctx_workarounds_init(engine, wal);
 269
 270	/* WaDisableThreadStallDopClockGating:chv */
 271	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 272
 273	/* Improve HiZ throughput on CHV. */
 274	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 275}
 276
 277static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
 278				      struct i915_wa_list *wal)
 279{
 280	struct drm_i915_private *i915 = engine->i915;
 281
 282	if (HAS_LLC(i915)) {
 283		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 284		 *
 285		 * Must match Display Engine. See
 286		 * WaCompressedResourceDisplayNewHashMode.
 287		 */
 288		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 289				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
 290		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 291				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
 292	}
 293
 294	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 295	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
 296	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 297			  FLOW_CONTROL_ENABLE |
 298			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 299
 300	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
 301	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
 302	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 303			  GEN9_ENABLE_YV12_BUGFIX |
 304			  GEN9_ENABLE_GPGPU_PREEMPTION);
 305
 306	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
 307	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
 308	WA_SET_BIT_MASKED(CACHE_MODE_1,
 309			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
 310			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
 311
 312	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
 313	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 314			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 315
 316	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
 317	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 318			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
 319			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
 320
 321	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
 322	 * both tied to WaForceContextSaveRestoreNonCoherent
 323	 * in some hsds for skl. We keep the tie for all gen9. The
 324	 * documentation is a bit hazy and so we want to get common behaviour,
 325	 * even though there is no clear evidence we would need both on kbl/bxt.
 326	 * This area has been source of system hangs so we play it safe
 327	 * and mimic the skl regardless of what bspec says.
 328	 *
 329	 * Use Force Non-Coherent whenever executing a 3D context. This
 330	 * is a workaround for a possible hang in the unlikely event
 331	 * a TLB invalidation occurs during a PSD flush.
 332	 */
 333
 334	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
 335	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 336			  HDC_FORCE_NON_COHERENT);
 337
 338	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 339	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
 340		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 341				  GEN8_SAMPLER_POWER_BYPASS_DIS);
 342
 343	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
 344	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 345
 346	/*
 347	 * Supporting preemption with fine-granularity requires changes in the
 348	 * batch buffer programming. Since we can't break old userspace, we
 349	 * need to set our default preemption level to safe value. Userspace is
 350	 * still able to use more fine-grained preemption levels, since in
 351	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
 352	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
 353	 * not real HW workarounds, but merely a way to start using preemption
 354	 * while maintaining old contract with userspace.
 355	 */
 356
 357	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
 358	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 359
 360	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
 361	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 362			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 363			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 364
 365	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
 366	if (IS_GEN9_LP(i915))
 367		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 368}
 369
 370static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
 371				struct i915_wa_list *wal)
 372{
 373	struct drm_i915_private *i915 = engine->i915;
 374	u8 vals[3] = { 0, 0, 0 };
 375	unsigned int i;
 376
 377	for (i = 0; i < 3; i++) {
 378		u8 ss;
 379
 380		/*
 381		 * Only consider slices where one, and only one, subslice has 7
 382		 * EUs
 383		 */
 384		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
 385			continue;
 386
 387		/*
 388		 * subslice_7eu[i] != 0 (because of the check above) and
 389		 * ss_max == 4 (maximum number of subslices possible per slice)
 390		 *
 391		 * ->    0 <= ss <= 3;
 392		 */
 393		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
 394		vals[i] = 3 - ss;
 395	}
 396
 397	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
 398		return;
 399
 400	/* Tune IZ hashing. See intel_device_info_runtime_init() */
 401	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
 402			    GEN9_IZ_HASHING_MASK(2) |
 403			    GEN9_IZ_HASHING_MASK(1) |
 404			    GEN9_IZ_HASHING_MASK(0),
 405			    GEN9_IZ_HASHING(2, vals[2]) |
 406			    GEN9_IZ_HASHING(1, vals[1]) |
 407			    GEN9_IZ_HASHING(0, vals[0]));
 408}
 409
 410static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
 411				     struct i915_wa_list *wal)
 412{
 413	gen9_ctx_workarounds_init(engine, wal);
 414	skl_tune_iz_hashing(engine, wal);
 415}
 416
 417static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
 418				     struct i915_wa_list *wal)
 419{
 420	gen9_ctx_workarounds_init(engine, wal);
 421
 422	/* WaDisableThreadStallDopClockGating:bxt */
 423	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
 424			  STALL_DOP_GATING_DISABLE);
 425
 426	/* WaToEnableHwFixForPushConstHWBug:bxt */
 427	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 428			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 429}
 430
 431static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
 432				     struct i915_wa_list *wal)
 433{
 434	struct drm_i915_private *i915 = engine->i915;
 435
 436	gen9_ctx_workarounds_init(engine, wal);
 437
 438	/* WaToEnableHwFixForPushConstHWBug:kbl */
 439	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
 440		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 441				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 442
 443	/* WaDisableSbeCacheDispatchPortSharing:kbl */
 444	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 445			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 446}
 447
 448static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
 449				     struct i915_wa_list *wal)
 450{
 451	gen9_ctx_workarounds_init(engine, wal);
 452
 453	/* WaToEnableHwFixForPushConstHWBug:glk */
 454	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 455			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 456}
 457
 458static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
 459				     struct i915_wa_list *wal)
 460{
 461	gen9_ctx_workarounds_init(engine, wal);
 462
 463	/* WaToEnableHwFixForPushConstHWBug:cfl */
 464	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 465			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 466
 467	/* WaDisableSbeCacheDispatchPortSharing:cfl */
 468	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
 469			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 470}
 471
 472static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
 473				     struct i915_wa_list *wal)
 474{
 475	struct drm_i915_private *i915 = engine->i915;
 476
 477	/* WaForceContextSaveRestoreNonCoherent:cnl */
 478	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
 479			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
 480
 481	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
 482	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 483		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
 484
 485	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
 486	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 487			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 488
 489	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
 490	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
 491		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 492				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
 493
 494	/* WaPushConstantDereferenceHoldDisable:cnl */
 495	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
 496
 497	/* FtrEnableFastAnisoL1BankingFix:cnl */
 498	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
 499
 500	/* WaDisable3DMidCmdPreemption:cnl */
 501	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
 502
 503	/* WaDisableGPGPUMidCmdPreemption:cnl */
 504	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 505			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 506			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
 507
 508	/* WaDisableEarlyEOT:cnl */
 509	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
 510}
 511
 512static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
 513				     struct i915_wa_list *wal)
 514{
 515	struct drm_i915_private *i915 = engine->i915;
 516
 517	/* WaDisableBankHangMode:icl */
 518	wa_write(wal,
 519		 GEN8_L3CNTLREG,
 520		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
 521		 GEN8_ERRDETBCTRL);
 522
 523	/* Wa_1604370585:icl (pre-prod)
 524	 * Formerly known as WaPushConstantDereferenceHoldDisable
 525	 */
 526	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 527		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 528				  PUSH_CONSTANT_DEREF_DISABLE);
 529
 530	/* WaForceEnableNonCoherent:icl
 531	 * This is not the same workaround as in early Gen9 platforms, where
 532	 * lacking this could cause system hangs, but coherency performance
 533	 * overhead is high and only a few compute workloads really need it
 534	 * (the register is whitelisted in hardware now, so UMDs can opt in
 535	 * for coherency if they have a good reason).
 536	 */
 537	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
 538
 539	/* Wa_2006611047:icl (pre-prod)
 540	 * Formerly known as WaDisableImprovedTdlClkGating
 541	 */
 542	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 543		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
 544				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
 545
 546	/* Wa_2006665173:icl (pre-prod) */
 547	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 548		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
 549				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
 550
 551	/* WaEnableFloatBlendOptimization:icl */
 552	wa_write_masked_or(wal,
 553			   GEN10_CACHE_MODE_SS,
 554			   0, /* write-only, so skip validation */
 555			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
 556
 557	/* WaDisableGPGPUMidThreadPreemption:icl */
 558	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 559			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 560			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
 561
 562	/* allow headerless messages for preemptible GPGPU context */
 563	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
 564			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 565}
 566
 567static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
 568				     struct i915_wa_list *wal)
 569{
 570}
 571
 572static void
 573__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 574			   struct i915_wa_list *wal,
 575			   const char *name)
 576{
 577	struct drm_i915_private *i915 = engine->i915;
 578
 579	if (engine->class != RENDER_CLASS)
 580		return;
 581
 582	wa_init_start(wal, name, engine->name);
 583
 584	if (IS_GEN(i915, 12))
 585		tgl_ctx_workarounds_init(engine, wal);
 586	else if (IS_GEN(i915, 11))
 587		icl_ctx_workarounds_init(engine, wal);
 588	else if (IS_CANNONLAKE(i915))
 589		cnl_ctx_workarounds_init(engine, wal);
 590	else if (IS_COFFEELAKE(i915))
 591		cfl_ctx_workarounds_init(engine, wal);
 592	else if (IS_GEMINILAKE(i915))
 593		glk_ctx_workarounds_init(engine, wal);
 594	else if (IS_KABYLAKE(i915))
 595		kbl_ctx_workarounds_init(engine, wal);
 596	else if (IS_BROXTON(i915))
 597		bxt_ctx_workarounds_init(engine, wal);
 598	else if (IS_SKYLAKE(i915))
 599		skl_ctx_workarounds_init(engine, wal);
 600	else if (IS_CHERRYVIEW(i915))
 601		chv_ctx_workarounds_init(engine, wal);
 602	else if (IS_BROADWELL(i915))
 603		bdw_ctx_workarounds_init(engine, wal);
 604	else if (INTEL_GEN(i915) < 8)
 605		return;
 606	else
 607		MISSING_CASE(INTEL_GEN(i915));
 608
 609	wa_init_finish(wal);
 610}
 611
 612void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 613{
 614	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
 615}
 616
 617int intel_engine_emit_ctx_wa(struct i915_request *rq)
 618{
 619	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
 620	struct i915_wa *wa;
 621	unsigned int i;
 622	u32 *cs;
 623	int ret;
 624
 625	if (wal->count == 0)
 626		return 0;
 627
 628	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 629	if (ret)
 630		return ret;
 631
 632	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
 633	if (IS_ERR(cs))
 634		return PTR_ERR(cs);
 635
 636	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
 637	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 638		*cs++ = i915_mmio_reg_offset(wa->reg);
 639		*cs++ = wa->val;
 640	}
 641	*cs++ = MI_NOOP;
 642
 643	intel_ring_advance(rq, cs);
 644
 645	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
 646	if (ret)
 647		return ret;
 648
 649	return 0;
 650}
 651
 652static void
 653gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 654{
 655	/* WaDisableKillLogic:bxt,skl,kbl */
 656	if (!IS_COFFEELAKE(i915))
 657		wa_write_or(wal,
 658			    GAM_ECOCHK,
 659			    ECOCHK_DIS_TLB);
 660
 661	if (HAS_LLC(i915)) {
 662		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
 663		 *
 664		 * Must match Display Engine. See
 665		 * WaCompressedResourceDisplayNewHashMode.
 666		 */
 667		wa_write_or(wal,
 668			    MMCD_MISC_CTRL,
 669			    MMCD_PCLA | MMCD_HOTSPOT_EN);
 670	}
 671
 672	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
 673	wa_write_or(wal,
 674		    GAM_ECOCHK,
 675		    BDW_DISABLE_HDC_INVALIDATION);
 676}
 677
 678static void
 679skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 680{
 681	gen9_gt_workarounds_init(i915, wal);
 682
 683	/* WaDisableGafsUnitClkGating:skl */
 684	wa_write_or(wal,
 685		    GEN7_UCGCTL4,
 686		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 687
 688	/* WaInPlaceDecompressionHang:skl */
 689	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
 690		wa_write_or(wal,
 691			    GEN9_GAMT_ECO_REG_RW_IA,
 692			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 693}
 694
 695static void
 696bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 697{
 698	gen9_gt_workarounds_init(i915, wal);
 699
 700	/* WaInPlaceDecompressionHang:bxt */
 701	wa_write_or(wal,
 702		    GEN9_GAMT_ECO_REG_RW_IA,
 703		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 704}
 705
 706static void
 707kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 708{
 709	gen9_gt_workarounds_init(i915, wal);
 710
 711	/* WaDisableDynamicCreditSharing:kbl */
 712	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
 713		wa_write_or(wal,
 714			    GAMT_CHKN_BIT_REG,
 715			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
 716
 717	/* WaDisableGafsUnitClkGating:kbl */
 718	wa_write_or(wal,
 719		    GEN7_UCGCTL4,
 720		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 721
 722	/* WaInPlaceDecompressionHang:kbl */
 723	wa_write_or(wal,
 724		    GEN9_GAMT_ECO_REG_RW_IA,
 725		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 726}
 727
 728static void
 729glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 730{
 731	gen9_gt_workarounds_init(i915, wal);
 732}
 733
 734static void
 735cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 736{
 737	gen9_gt_workarounds_init(i915, wal);
 738
 739	/* WaDisableGafsUnitClkGating:cfl */
 740	wa_write_or(wal,
 741		    GEN7_UCGCTL4,
 742		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 743
 744	/* WaInPlaceDecompressionHang:cfl */
 745	wa_write_or(wal,
 746		    GEN9_GAMT_ECO_REG_RW_IA,
 747		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 748}
 749
 750static void
 751wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 752{
 753	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
 754	unsigned int slice, subslice;
 755	u32 l3_en, mcr, mcr_mask;
 756
 757	GEM_BUG_ON(INTEL_GEN(i915) < 10);
 758
 759	/*
 760	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
 761	 * L3Banks could be fused off in single slice scenario. If that is
 762	 * the case, we might need to program MCR select to a valid L3Bank
 763	 * by default, to make sure we correctly read certain registers
 764	 * later on (in the range 0xB100 - 0xB3FF).
 765	 *
 766	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
 767	 * Before any MMIO read into slice/subslice specific registers, MCR
 768	 * packet control register needs to be programmed to point to any
 769	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
 770	 * This means each subsequent MMIO read will be forwarded to an
 771	 * specific s/ss combination, but this is OK since these registers
 772	 * are consistent across s/ss in almost all cases. In the rare
 773	 * occasions, such as INSTDONE, where this value is dependent
 774	 * on s/ss combo, the read should be done with read_subslice_reg.
 775	 *
 776	 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
 777	 * to which subslice, or to which L3 bank, the respective mmio reads
 778	 * will go, we have to find a common index which works for both
 779	 * accesses.
 780	 *
 781	 * Case where we cannot find a common index fortunately should not
 782	 * happen in production hardware, so we only emit a warning instead of
 783	 * implementing something more complex that requires checking the range
 784	 * of every MMIO read.
 785	 */
 786
 787	if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
 788		u32 l3_fuse =
 789			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
 790			GEN10_L3BANK_MASK;
 791
 792		DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
 793		l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
 794	} else {
 795		l3_en = ~0;
 796	}
 797
 798	slice = fls(sseu->slice_mask) - 1;
 799	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
 800	subslice = fls(l3_en & sseu->subslice_mask[slice]);
 801	if (!subslice) {
 802		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
 803			 sseu->subslice_mask[slice], l3_en);
 804		subslice = fls(l3_en);
 805		WARN_ON(!subslice);
 806	}
 807	subslice--;
 808
 809	if (INTEL_GEN(i915) >= 11) {
 810		mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
 811		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
 812	} else {
 813		mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
 814		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
 815	}
 816
 817	DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
 818
 819	wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
 820}
 821
 822static void
 823cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 824{
 825	wa_init_mcr(i915, wal);
 826
 827	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
 828	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
 829		wa_write_or(wal,
 830			    GAMT_CHKN_BIT_REG,
 831			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
 832
 833	/* WaInPlaceDecompressionHang:cnl */
 834	wa_write_or(wal,
 835		    GEN9_GAMT_ECO_REG_RW_IA,
 836		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 837}
 838
 839static void
 840icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 841{
 842	wa_init_mcr(i915, wal);
 843
 844	/* WaInPlaceDecompressionHang:icl */
 845	wa_write_or(wal,
 846		    GEN9_GAMT_ECO_REG_RW_IA,
 847		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 848
 849	/* WaModifyGamTlbPartitioning:icl */
 850	wa_write_masked_or(wal,
 851			   GEN11_GACB_PERF_CTRL,
 852			   GEN11_HASH_CTRL_MASK,
 853			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
 854
 855	/* Wa_1405766107:icl
 856	 * Formerly known as WaCL2SFHalfMaxAlloc
 857	 */
 858	wa_write_or(wal,
 859		    GEN11_LSN_UNSLCVC,
 860		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
 861		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
 862
 863	/* Wa_220166154:icl
 864	 * Formerly known as WaDisCtxReload
 865	 */
 866	wa_write_or(wal,
 867		    GEN8_GAMW_ECO_DEV_RW_IA,
 868		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
 869
 870	/* Wa_1405779004:icl (pre-prod) */
 871	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
 872		wa_write_or(wal,
 873			    SLICE_UNIT_LEVEL_CLKGATE,
 874			    MSCUNIT_CLKGATE_DIS);
 875
 876	/* Wa_1406680159:icl */
 877	wa_write_or(wal,
 878		    SUBSLICE_UNIT_LEVEL_CLKGATE,
 879		    GWUNIT_CLKGATE_DIS);
 880
 881	/* Wa_1406838659:icl (pre-prod) */
 882	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
 883		wa_write_or(wal,
 884			    INF_UNIT_LEVEL_CLKGATE,
 885			    CGPSF_CLKGATE_DIS);
 886
 887	/* Wa_1406463099:icl
 888	 * Formerly known as WaGamTlbPendError
 889	 */
 890	wa_write_or(wal,
 891		    GAMT_CHKN_BIT_REG,
 892		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
 893}
 894
 895static void
 896tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
 897{
 898}
 899
 900static void
 901gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
 902{
 903	if (IS_GEN(i915, 12))
 904		tgl_gt_workarounds_init(i915, wal);
 905	else if (IS_GEN(i915, 11))
 906		icl_gt_workarounds_init(i915, wal);
 907	else if (IS_CANNONLAKE(i915))
 908		cnl_gt_workarounds_init(i915, wal);
 909	else if (IS_COFFEELAKE(i915))
 910		cfl_gt_workarounds_init(i915, wal);
 911	else if (IS_GEMINILAKE(i915))
 912		glk_gt_workarounds_init(i915, wal);
 913	else if (IS_KABYLAKE(i915))
 914		kbl_gt_workarounds_init(i915, wal);
 915	else if (IS_BROXTON(i915))
 916		bxt_gt_workarounds_init(i915, wal);
 917	else if (IS_SKYLAKE(i915))
 918		skl_gt_workarounds_init(i915, wal);
 919	else if (INTEL_GEN(i915) <= 8)
 920		return;
 921	else
 922		MISSING_CASE(INTEL_GEN(i915));
 923}
 924
 925void intel_gt_init_workarounds(struct drm_i915_private *i915)
 926{
 927	struct i915_wa_list *wal = &i915->gt_wa_list;
 928
 929	wa_init_start(wal, "GT", "global");
 930	gt_init_workarounds(i915, wal);
 931	wa_init_finish(wal);
 932}
 933
 934static enum forcewake_domains
 935wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 936{
 937	enum forcewake_domains fw = 0;
 938	struct i915_wa *wa;
 939	unsigned int i;
 940
 941	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 942		fw |= intel_uncore_forcewake_for_reg(uncore,
 943						     wa->reg,
 944						     FW_REG_READ |
 945						     FW_REG_WRITE);
 946
 947	return fw;
 948}
 949
 950static bool
 951wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
 952{
 953	if ((cur ^ wa->val) & wa->read) {
 954		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
 955			  name, from, i915_mmio_reg_offset(wa->reg),
 956			  cur, cur & wa->read,
 957			  wa->val, wa->mask);
 958
 959		return false;
 960	}
 961
 962	return true;
 963}
 964
 965static void
 966wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 967{
 968	enum forcewake_domains fw;
 969	unsigned long flags;
 970	struct i915_wa *wa;
 971	unsigned int i;
 972
 973	if (!wal->count)
 974		return;
 975
 976	fw = wal_get_fw_for_rmw(uncore, wal);
 977
 978	spin_lock_irqsave(&uncore->lock, flags);
 979	intel_uncore_forcewake_get__locked(uncore, fw);
 980
 981	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 982		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
 983		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 984			wa_verify(wa,
 985				  intel_uncore_read_fw(uncore, wa->reg),
 986				  wal->name, "application");
 987	}
 988
 989	intel_uncore_forcewake_put__locked(uncore, fw);
 990	spin_unlock_irqrestore(&uncore->lock, flags);
 991}
 992
 993void intel_gt_apply_workarounds(struct intel_gt *gt)
 994{
 995	wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
 996}
 997
 998static bool wa_list_verify(struct intel_uncore *uncore,
 999			   const struct i915_wa_list *wal,
1000			   const char *from)
1001{
1002	struct i915_wa *wa;
1003	unsigned int i;
1004	bool ok = true;
1005
1006	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1007		ok &= wa_verify(wa,
1008				intel_uncore_read(uncore, wa->reg),
1009				wal->name, from);
1010
1011	return ok;
1012}
1013
1014bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1015{
1016	return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
1017}
1018
1019static inline bool is_nonpriv_flags_valid(u32 flags)
1020{
1021	/* Check only valid flag bits are set */
1022	if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1023		return false;
1024
1025	/* NB: Only 3 out of 4 enum values are valid for access field */
1026	if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1027	    RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1028		return false;
1029
1030	return true;
1031}
1032
1033static void
1034whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1035{
1036	struct i915_wa wa = {
1037		.reg = reg
1038	};
1039
1040	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1041		return;
1042
1043	if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1044		return;
1045
1046	wa.reg.reg |= flags;
1047	_wa_add(wal, &wa);
1048}
1049
1050static void
1051whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1052{
1053	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1054}
1055
1056static void gen9_whitelist_build(struct i915_wa_list *w)
1057{
1058	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1059	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1060
1061	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1062	whitelist_reg(w, GEN8_CS_CHICKEN1);
1063
1064	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1065	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1066
1067	/* WaSendPushConstantsFromMMIO:skl,bxt */
1068	whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1069}
1070
1071static void skl_whitelist_build(struct intel_engine_cs *engine)
1072{
1073	struct i915_wa_list *w = &engine->whitelist;
1074
1075	if (engine->class != RENDER_CLASS)
1076		return;
1077
1078	gen9_whitelist_build(w);
1079
1080	/* WaDisableLSQCROPERFforOCL:skl */
1081	whitelist_reg(w, GEN8_L3SQCREG4);
1082}
1083
1084static void bxt_whitelist_build(struct intel_engine_cs *engine)
1085{
1086	if (engine->class != RENDER_CLASS)
1087		return;
1088
1089	gen9_whitelist_build(&engine->whitelist);
1090}
1091
1092static void kbl_whitelist_build(struct intel_engine_cs *engine)
1093{
1094	struct i915_wa_list *w = &engine->whitelist;
1095
1096	if (engine->class != RENDER_CLASS)
1097		return;
1098
1099	gen9_whitelist_build(w);
1100
1101	/* WaDisableLSQCROPERFforOCL:kbl */
1102	whitelist_reg(w, GEN8_L3SQCREG4);
1103}
1104
1105static void glk_whitelist_build(struct intel_engine_cs *engine)
1106{
1107	struct i915_wa_list *w = &engine->whitelist;
1108
1109	if (engine->class != RENDER_CLASS)
1110		return;
1111
1112	gen9_whitelist_build(w);
1113
1114	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1115	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1116}
1117
1118static void cfl_whitelist_build(struct intel_engine_cs *engine)
1119{
1120	struct i915_wa_list *w = &engine->whitelist;
1121
1122	if (engine->class != RENDER_CLASS)
1123		return;
1124
1125	gen9_whitelist_build(w);
1126
1127	/*
1128	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1129	 *
1130	 * This covers 4 register which are next to one another :
1131	 *   - PS_INVOCATION_COUNT
1132	 *   - PS_INVOCATION_COUNT_UDW
1133	 *   - PS_DEPTH_COUNT
1134	 *   - PS_DEPTH_COUNT_UDW
1135	 */
1136	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1137			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1138			  RING_FORCE_TO_NONPRIV_RANGE_4);
1139}
1140
1141static void cnl_whitelist_build(struct intel_engine_cs *engine)
1142{
1143	struct i915_wa_list *w = &engine->whitelist;
1144
1145	if (engine->class != RENDER_CLASS)
1146		return;
1147
1148	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1149	whitelist_reg(w, GEN8_CS_CHICKEN1);
1150}
1151
1152static void icl_whitelist_build(struct intel_engine_cs *engine)
1153{
1154	struct i915_wa_list *w = &engine->whitelist;
1155
1156	switch (engine->class) {
1157	case RENDER_CLASS:
1158		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
1159		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1160
1161		/* WaAllowUMDToModifySamplerMode:icl */
1162		whitelist_reg(w, GEN10_SAMPLER_MODE);
1163
1164		/* WaEnableStateCacheRedirectToCS:icl */
1165		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1166
1167		/*
1168		 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1169		 *
1170		 * This covers 4 register which are next to one another :
1171		 *   - PS_INVOCATION_COUNT
1172		 *   - PS_INVOCATION_COUNT_UDW
1173		 *   - PS_DEPTH_COUNT
1174		 *   - PS_DEPTH_COUNT_UDW
1175		 */
1176		whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1177				  RING_FORCE_TO_NONPRIV_ACCESS_RD |
1178				  RING_FORCE_TO_NONPRIV_RANGE_4);
1179		break;
1180
1181	case VIDEO_DECODE_CLASS:
1182		/* hucStatusRegOffset */
1183		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1184				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1185		/* hucUKernelHdrInfoRegOffset */
1186		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1187				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1188		/* hucStatus2RegOffset */
1189		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1190				  RING_FORCE_TO_NONPRIV_ACCESS_RD);
1191		break;
1192
1193	default:
1194		break;
1195	}
1196}
1197
1198static void tgl_whitelist_build(struct intel_engine_cs *engine)
1199{
1200}
1201
1202void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1203{
1204	struct drm_i915_private *i915 = engine->i915;
1205	struct i915_wa_list *w = &engine->whitelist;
1206
1207	wa_init_start(w, "whitelist", engine->name);
1208
1209	if (IS_GEN(i915, 12))
1210		tgl_whitelist_build(engine);
1211	else if (IS_GEN(i915, 11))
1212		icl_whitelist_build(engine);
1213	else if (IS_CANNONLAKE(i915))
1214		cnl_whitelist_build(engine);
1215	else if (IS_COFFEELAKE(i915))
1216		cfl_whitelist_build(engine);
1217	else if (IS_GEMINILAKE(i915))
1218		glk_whitelist_build(engine);
1219	else if (IS_KABYLAKE(i915))
1220		kbl_whitelist_build(engine);
1221	else if (IS_BROXTON(i915))
1222		bxt_whitelist_build(engine);
1223	else if (IS_SKYLAKE(i915))
1224		skl_whitelist_build(engine);
1225	else if (INTEL_GEN(i915) <= 8)
1226		return;
1227	else
1228		MISSING_CASE(INTEL_GEN(i915));
1229
1230	wa_init_finish(w);
1231}
1232
1233void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1234{
1235	const struct i915_wa_list *wal = &engine->whitelist;
1236	struct intel_uncore *uncore = engine->uncore;
1237	const u32 base = engine->mmio_base;
1238	struct i915_wa *wa;
1239	unsigned int i;
1240
1241	if (!wal->count)
1242		return;
1243
1244	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1245		intel_uncore_write(uncore,
1246				   RING_FORCE_TO_NONPRIV(base, i),
1247				   i915_mmio_reg_offset(wa->reg));
1248
1249	/* And clear the rest just in case of garbage */
1250	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1251		intel_uncore_write(uncore,
1252				   RING_FORCE_TO_NONPRIV(base, i),
1253				   i915_mmio_reg_offset(RING_NOPID(base)));
1254}
1255
1256static void
1257rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1258{
1259	struct drm_i915_private *i915 = engine->i915;
1260
1261	if (IS_GEN(i915, 11)) {
1262		/* This is not an Wa. Enable for better image quality */
1263		wa_masked_en(wal,
1264			     _3D_CHICKEN3,
1265			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1266
1267		/* WaPipelineFlushCoherentLines:icl */
1268		wa_write_or(wal,
1269			    GEN8_L3SQCREG4,
1270			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1271
1272		/*
1273		 * Wa_1405543622:icl
1274		 * Formerly known as WaGAPZPriorityScheme
1275		 */
1276		wa_write_or(wal,
1277			    GEN8_GARBCNTL,
1278			    GEN11_ARBITRATION_PRIO_ORDER_MASK);
1279
1280		/*
1281		 * Wa_1604223664:icl
1282		 * Formerly known as WaL3BankAddressHashing
1283		 */
1284		wa_write_masked_or(wal,
1285				   GEN8_GARBCNTL,
1286				   GEN11_HASH_CTRL_EXCL_MASK,
1287				   GEN11_HASH_CTRL_EXCL_BIT0);
1288		wa_write_masked_or(wal,
1289				   GEN11_GLBLINVL,
1290				   GEN11_BANK_HASH_ADDR_EXCL_MASK,
1291				   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1292
1293		/*
1294		 * Wa_1405733216:icl
1295		 * Formerly known as WaDisableCleanEvicts
1296		 */
1297		wa_write_or(wal,
1298			    GEN8_L3SQCREG4,
1299			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1300
1301		/* WaForwardProgressSoftReset:icl */
1302		wa_write_or(wal,
1303			    GEN10_SCRATCH_LNCF2,
1304			    PMFLUSHDONE_LNICRSDROP |
1305			    PMFLUSH_GAPL3UNBLOCK |
1306			    PMFLUSHDONE_LNEBLK);
1307
1308		/* Wa_1406609255:icl (pre-prod) */
1309		if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1310			wa_write_or(wal,
1311				    GEN7_SARCHKMD,
1312				    GEN7_DISABLE_DEMAND_PREFETCH);
1313
1314		/* Wa_1606682166:icl */
1315		wa_write_or(wal,
1316			    GEN7_SARCHKMD,
1317			    GEN7_DISABLE_SAMPLER_PREFETCH);
1318
1319		/* Wa_1409178092:icl */
1320		wa_write_masked_or(wal,
1321				   GEN11_SCRATCH2,
1322				   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1323				   0);
1324	}
1325
1326	if (IS_GEN_RANGE(i915, 9, 11)) {
1327		/* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1328		wa_masked_en(wal,
1329			     GEN7_FF_SLICE_CS_CHICKEN1,
1330			     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1331	}
1332
1333	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1334		/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1335		wa_write_or(wal,
1336			    GEN8_GARBCNTL,
1337			    GEN9_GAPS_TSV_CREDIT_DISABLE);
1338	}
1339
1340	if (IS_BROXTON(i915)) {
1341		/* WaDisablePooledEuLoadBalancingFix:bxt */
1342		wa_masked_en(wal,
1343			     FF_SLICE_CS_CHICKEN2,
1344			     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1345	}
1346
1347	if (IS_GEN(i915, 9)) {
1348		/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1349		wa_masked_en(wal,
1350			     GEN9_CSFE_CHICKEN1_RCS,
1351			     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1352
1353		/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1354		wa_write_or(wal,
1355			    BDW_SCRATCH1,
1356			    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1357
1358		/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1359		if (IS_GEN9_LP(i915))
1360			wa_write_masked_or(wal,
1361					   GEN8_L3SQCREG1,
1362					   L3_PRIO_CREDITS_MASK,
1363					   L3_GENERAL_PRIO_CREDITS(62) |
1364					   L3_HIGH_PRIO_CREDITS(2));
1365
1366		/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1367		wa_write_or(wal,
1368			    GEN8_L3SQCREG4,
1369			    GEN8_LQSC_FLUSH_COHERENT_LINES);
1370	}
1371}
1372
1373static void
1374xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1375{
1376	struct drm_i915_private *i915 = engine->i915;
1377
1378	/* WaKBLVECSSemaphoreWaitPoll:kbl */
1379	if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1380		wa_write(wal,
1381			 RING_SEMA_WAIT_POLL(engine->mmio_base),
1382			 1);
1383	}
1384}
1385
1386static void
1387engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1388{
1389	if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1390		return;
1391
1392	if (engine->class == RENDER_CLASS)
1393		rcs_engine_wa_init(engine, wal);
1394	else
1395		xcs_engine_wa_init(engine, wal);
1396}
1397
1398void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1399{
1400	struct i915_wa_list *wal = &engine->wa_list;
1401
1402	if (INTEL_GEN(engine->i915) < 8)
1403		return;
1404
1405	wa_init_start(wal, "engine", engine->name);
1406	engine_init_workarounds(engine, wal);
1407	wa_init_finish(wal);
1408}
1409
1410void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1411{
1412	wa_list_apply(engine->uncore, &engine->wa_list);
1413}
1414
1415static struct i915_vma *
1416create_scratch(struct i915_address_space *vm, int count)
1417{
1418	struct drm_i915_gem_object *obj;
1419	struct i915_vma *vma;
1420	unsigned int size;
1421	int err;
1422
1423	size = round_up(count * sizeof(u32), PAGE_SIZE);
1424	obj = i915_gem_object_create_internal(vm->i915, size);
1425	if (IS_ERR(obj))
1426		return ERR_CAST(obj);
1427
1428	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1429
1430	vma = i915_vma_instance(obj, vm, NULL);
1431	if (IS_ERR(vma)) {
1432		err = PTR_ERR(vma);
1433		goto err_obj;
1434	}
1435
1436	err = i915_vma_pin(vma, 0, 0,
1437			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1438	if (err)
1439		goto err_obj;
1440
1441	return vma;
1442
1443err_obj:
1444	i915_gem_object_put(obj);
1445	return ERR_PTR(err);
1446}
1447
1448static bool mcr_range(struct drm_i915_private *i915, u32 offset)
1449{
1450	/*
1451	 * Registers in this range are affected by the MCR selector
1452	 * which only controls CPU initiated MMIO. Routing does not
1453	 * work for CS access so we cannot verify them on this path.
1454	 */
1455	if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff))
1456		return true;
1457
1458	return false;
1459}
1460
1461static int
1462wa_list_srm(struct i915_request *rq,
1463	    const struct i915_wa_list *wal,
1464	    struct i915_vma *vma)
1465{
1466	struct drm_i915_private *i915 = rq->i915;
1467	unsigned int i, count = 0;
1468	const struct i915_wa *wa;
1469	u32 srm, *cs;
1470
1471	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1472	if (INTEL_GEN(i915) >= 8)
1473		srm++;
1474
1475	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1476		if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
1477			count++;
1478	}
1479
1480	cs = intel_ring_begin(rq, 4 * count);
1481	if (IS_ERR(cs))
1482		return PTR_ERR(cs);
1483
1484	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1485		u32 offset = i915_mmio_reg_offset(wa->reg);
1486
1487		if (mcr_range(i915, offset))
1488			continue;
1489
1490		*cs++ = srm;
1491		*cs++ = offset;
1492		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1493		*cs++ = 0;
1494	}
1495	intel_ring_advance(rq, cs);
1496
1497	return 0;
1498}
1499
1500static int engine_wa_list_verify(struct intel_context *ce,
1501				 const struct i915_wa_list * const wal,
1502				 const char *from)
1503{
1504	const struct i915_wa *wa;
1505	struct i915_request *rq;
1506	struct i915_vma *vma;
1507	unsigned int i;
1508	u32 *results;
1509	int err;
1510
1511	if (!wal->count)
1512		return 0;
1513
1514	vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
1515	if (IS_ERR(vma))
1516		return PTR_ERR(vma);
1517
1518	rq = intel_context_create_request(ce);
1519	if (IS_ERR(rq)) {
1520		err = PTR_ERR(rq);
1521		goto err_vma;
1522	}
1523
1524	err = wa_list_srm(rq, wal, vma);
1525	if (err)
1526		goto err_vma;
1527
1528	i915_request_add(rq);
1529	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1530		err = -ETIME;
1531		goto err_vma;
1532	}
1533
1534	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1535	if (IS_ERR(results)) {
1536		err = PTR_ERR(results);
1537		goto err_vma;
1538	}
1539
1540	err = 0;
1541	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1542		if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
1543			continue;
1544
1545		if (!wa_verify(wa, results[i], wal->name, from))
1546			err = -ENXIO;
1547	}
1548
1549	i915_gem_object_unpin_map(vma->obj);
1550
1551err_vma:
1552	i915_vma_unpin(vma);
1553	i915_vma_put(vma);
1554	return err;
1555}
1556
1557int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1558				    const char *from)
1559{
1560	return engine_wa_list_verify(engine->kernel_context,
1561				     &engine->wa_list,
1562				     from);
1563}
1564
1565#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1566#include "selftest_workarounds.c"
1567#endif