Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_hw_engine.h"
   7
   8#include <linux/nospec.h>
   9
  10#include <drm/drm_managed.h>
  11#include <uapi/drm/xe_drm.h>
  12
  13#include "regs/xe_engine_regs.h"
  14#include "regs/xe_gt_regs.h"
  15#include "regs/xe_irq_regs.h"
  16#include "xe_assert.h"
  17#include "xe_bo.h"
  18#include "xe_device.h"
  19#include "xe_execlist.h"
  20#include "xe_force_wake.h"
  21#include "xe_gsc.h"
  22#include "xe_gt.h"
  23#include "xe_gt_ccs_mode.h"
  24#include "xe_gt_printk.h"
  25#include "xe_gt_mcr.h"
  26#include "xe_gt_topology.h"
  27#include "xe_guc_capture.h"
  28#include "xe_hw_engine_group.h"
  29#include "xe_hw_fence.h"
  30#include "xe_irq.h"
  31#include "xe_lrc.h"
  32#include "xe_macros.h"
  33#include "xe_mmio.h"
  34#include "xe_reg_sr.h"
  35#include "xe_reg_whitelist.h"
  36#include "xe_rtp.h"
  37#include "xe_sched_job.h"
  38#include "xe_sriov.h"
  39#include "xe_tuning.h"
  40#include "xe_uc_fw.h"
  41#include "xe_wa.h"
  42
  43#define MAX_MMIO_BASES 3
  44struct engine_info {
  45	const char *name;
  46	unsigned int class : 8;
  47	unsigned int instance : 8;
  48	unsigned int irq_offset : 8;
  49	enum xe_force_wake_domains domain;
  50	u32 mmio_base;
  51};
  52
  53static const struct engine_info engine_infos[] = {
  54	[XE_HW_ENGINE_RCS0] = {
  55		.name = "rcs0",
  56		.class = XE_ENGINE_CLASS_RENDER,
  57		.instance = 0,
  58		.irq_offset = ilog2(INTR_RCS0),
  59		.domain = XE_FW_RENDER,
  60		.mmio_base = RENDER_RING_BASE,
  61	},
  62	[XE_HW_ENGINE_BCS0] = {
  63		.name = "bcs0",
  64		.class = XE_ENGINE_CLASS_COPY,
  65		.instance = 0,
  66		.irq_offset = ilog2(INTR_BCS(0)),
  67		.domain = XE_FW_RENDER,
  68		.mmio_base = BLT_RING_BASE,
  69	},
  70	[XE_HW_ENGINE_BCS1] = {
  71		.name = "bcs1",
  72		.class = XE_ENGINE_CLASS_COPY,
  73		.instance = 1,
  74		.irq_offset = ilog2(INTR_BCS(1)),
  75		.domain = XE_FW_RENDER,
  76		.mmio_base = XEHPC_BCS1_RING_BASE,
  77	},
  78	[XE_HW_ENGINE_BCS2] = {
  79		.name = "bcs2",
  80		.class = XE_ENGINE_CLASS_COPY,
  81		.instance = 2,
  82		.irq_offset = ilog2(INTR_BCS(2)),
  83		.domain = XE_FW_RENDER,
  84		.mmio_base = XEHPC_BCS2_RING_BASE,
  85	},
  86	[XE_HW_ENGINE_BCS3] = {
  87		.name = "bcs3",
  88		.class = XE_ENGINE_CLASS_COPY,
  89		.instance = 3,
  90		.irq_offset = ilog2(INTR_BCS(3)),
  91		.domain = XE_FW_RENDER,
  92		.mmio_base = XEHPC_BCS3_RING_BASE,
  93	},
  94	[XE_HW_ENGINE_BCS4] = {
  95		.name = "bcs4",
  96		.class = XE_ENGINE_CLASS_COPY,
  97		.instance = 4,
  98		.irq_offset = ilog2(INTR_BCS(4)),
  99		.domain = XE_FW_RENDER,
 100		.mmio_base = XEHPC_BCS4_RING_BASE,
 101	},
 102	[XE_HW_ENGINE_BCS5] = {
 103		.name = "bcs5",
 104		.class = XE_ENGINE_CLASS_COPY,
 105		.instance = 5,
 106		.irq_offset = ilog2(INTR_BCS(5)),
 107		.domain = XE_FW_RENDER,
 108		.mmio_base = XEHPC_BCS5_RING_BASE,
 109	},
 110	[XE_HW_ENGINE_BCS6] = {
 111		.name = "bcs6",
 112		.class = XE_ENGINE_CLASS_COPY,
 113		.instance = 6,
 114		.irq_offset = ilog2(INTR_BCS(6)),
 115		.domain = XE_FW_RENDER,
 116		.mmio_base = XEHPC_BCS6_RING_BASE,
 117	},
 118	[XE_HW_ENGINE_BCS7] = {
 119		.name = "bcs7",
 120		.class = XE_ENGINE_CLASS_COPY,
 121		.irq_offset = ilog2(INTR_BCS(7)),
 122		.instance = 7,
 123		.domain = XE_FW_RENDER,
 124		.mmio_base = XEHPC_BCS7_RING_BASE,
 125	},
 126	[XE_HW_ENGINE_BCS8] = {
 127		.name = "bcs8",
 128		.class = XE_ENGINE_CLASS_COPY,
 129		.instance = 8,
 130		.irq_offset = ilog2(INTR_BCS8),
 131		.domain = XE_FW_RENDER,
 132		.mmio_base = XEHPC_BCS8_RING_BASE,
 133	},
 134
 135	[XE_HW_ENGINE_VCS0] = {
 136		.name = "vcs0",
 137		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 138		.instance = 0,
 139		.irq_offset = 32 + ilog2(INTR_VCS(0)),
 140		.domain = XE_FW_MEDIA_VDBOX0,
 141		.mmio_base = BSD_RING_BASE,
 142	},
 143	[XE_HW_ENGINE_VCS1] = {
 144		.name = "vcs1",
 145		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 146		.instance = 1,
 147		.irq_offset = 32 + ilog2(INTR_VCS(1)),
 148		.domain = XE_FW_MEDIA_VDBOX1,
 149		.mmio_base = BSD2_RING_BASE,
 150	},
 151	[XE_HW_ENGINE_VCS2] = {
 152		.name = "vcs2",
 153		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 154		.instance = 2,
 155		.irq_offset = 32 + ilog2(INTR_VCS(2)),
 156		.domain = XE_FW_MEDIA_VDBOX2,
 157		.mmio_base = BSD3_RING_BASE,
 158	},
 159	[XE_HW_ENGINE_VCS3] = {
 160		.name = "vcs3",
 161		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 162		.instance = 3,
 163		.irq_offset = 32 + ilog2(INTR_VCS(3)),
 164		.domain = XE_FW_MEDIA_VDBOX3,
 165		.mmio_base = BSD4_RING_BASE,
 166	},
 167	[XE_HW_ENGINE_VCS4] = {
 168		.name = "vcs4",
 169		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 170		.instance = 4,
 171		.irq_offset = 32 + ilog2(INTR_VCS(4)),
 172		.domain = XE_FW_MEDIA_VDBOX4,
 173		.mmio_base = XEHP_BSD5_RING_BASE,
 174	},
 175	[XE_HW_ENGINE_VCS5] = {
 176		.name = "vcs5",
 177		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 178		.instance = 5,
 179		.irq_offset = 32 + ilog2(INTR_VCS(5)),
 180		.domain = XE_FW_MEDIA_VDBOX5,
 181		.mmio_base = XEHP_BSD6_RING_BASE,
 182	},
 183	[XE_HW_ENGINE_VCS6] = {
 184		.name = "vcs6",
 185		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 186		.instance = 6,
 187		.irq_offset = 32 + ilog2(INTR_VCS(6)),
 188		.domain = XE_FW_MEDIA_VDBOX6,
 189		.mmio_base = XEHP_BSD7_RING_BASE,
 190	},
 191	[XE_HW_ENGINE_VCS7] = {
 192		.name = "vcs7",
 193		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 194		.instance = 7,
 195		.irq_offset = 32 + ilog2(INTR_VCS(7)),
 196		.domain = XE_FW_MEDIA_VDBOX7,
 197		.mmio_base = XEHP_BSD8_RING_BASE,
 198	},
 199	[XE_HW_ENGINE_VECS0] = {
 200		.name = "vecs0",
 201		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 202		.instance = 0,
 203		.irq_offset = 32 + ilog2(INTR_VECS(0)),
 204		.domain = XE_FW_MEDIA_VEBOX0,
 205		.mmio_base = VEBOX_RING_BASE,
 206	},
 207	[XE_HW_ENGINE_VECS1] = {
 208		.name = "vecs1",
 209		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 210		.instance = 1,
 211		.irq_offset = 32 + ilog2(INTR_VECS(1)),
 212		.domain = XE_FW_MEDIA_VEBOX1,
 213		.mmio_base = VEBOX2_RING_BASE,
 214	},
 215	[XE_HW_ENGINE_VECS2] = {
 216		.name = "vecs2",
 217		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 218		.instance = 2,
 219		.irq_offset = 32 + ilog2(INTR_VECS(2)),
 220		.domain = XE_FW_MEDIA_VEBOX2,
 221		.mmio_base = XEHP_VEBOX3_RING_BASE,
 222	},
 223	[XE_HW_ENGINE_VECS3] = {
 224		.name = "vecs3",
 225		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 226		.instance = 3,
 227		.irq_offset = 32 + ilog2(INTR_VECS(3)),
 228		.domain = XE_FW_MEDIA_VEBOX3,
 229		.mmio_base = XEHP_VEBOX4_RING_BASE,
 230	},
 231	[XE_HW_ENGINE_CCS0] = {
 232		.name = "ccs0",
 233		.class = XE_ENGINE_CLASS_COMPUTE,
 234		.instance = 0,
 235		.irq_offset = ilog2(INTR_CCS(0)),
 236		.domain = XE_FW_RENDER,
 237		.mmio_base = COMPUTE0_RING_BASE,
 238	},
 239	[XE_HW_ENGINE_CCS1] = {
 240		.name = "ccs1",
 241		.class = XE_ENGINE_CLASS_COMPUTE,
 242		.instance = 1,
 243		.irq_offset = ilog2(INTR_CCS(1)),
 244		.domain = XE_FW_RENDER,
 245		.mmio_base = COMPUTE1_RING_BASE,
 246	},
 247	[XE_HW_ENGINE_CCS2] = {
 248		.name = "ccs2",
 249		.class = XE_ENGINE_CLASS_COMPUTE,
 250		.instance = 2,
 251		.irq_offset = ilog2(INTR_CCS(2)),
 252		.domain = XE_FW_RENDER,
 253		.mmio_base = COMPUTE2_RING_BASE,
 254	},
 255	[XE_HW_ENGINE_CCS3] = {
 256		.name = "ccs3",
 257		.class = XE_ENGINE_CLASS_COMPUTE,
 258		.instance = 3,
 259		.irq_offset = ilog2(INTR_CCS(3)),
 260		.domain = XE_FW_RENDER,
 261		.mmio_base = COMPUTE3_RING_BASE,
 262	},
 263	[XE_HW_ENGINE_GSCCS0] = {
 264		.name = "gsccs0",
 265		.class = XE_ENGINE_CLASS_OTHER,
 266		.instance = OTHER_GSC_INSTANCE,
 267		.domain = XE_FW_GSC,
 268		.mmio_base = GSCCS_RING_BASE,
 269	},
 270};
 271
 272static void hw_engine_fini(void *arg)
 273{
 274	struct xe_hw_engine *hwe = arg;
 275
 276	if (hwe->exl_port)
 277		xe_execlist_port_destroy(hwe->exl_port);
 278
 279	hwe->gt = NULL;
 280}
 281
 282/**
 283 * xe_hw_engine_mmio_write32() - Write engine register
 284 * @hwe: engine
 285 * @reg: register to write into
 286 * @val: desired 32-bit value to write
 287 *
 288 * This function will write val into an engine specific register.
 289 * Forcewake must be held by the caller.
 290 *
 291 */
 292void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
 293			       struct xe_reg reg, u32 val)
 294{
 295	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 296	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 297
 298	reg.addr += hwe->mmio_base;
 299
 300	xe_mmio_write32(&hwe->gt->mmio, reg, val);
 301}
 302
 303/**
 304 * xe_hw_engine_mmio_read32() - Read engine register
 305 * @hwe: engine
 306 * @reg: register to read from
 307 *
 308 * This function will read from an engine specific register.
 309 * Forcewake must be held by the caller.
 310 *
 311 * Return: value of the 32-bit register.
 312 */
 313u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 314{
 315	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 316	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 317
 318	reg.addr += hwe->mmio_base;
 319
 320	return xe_mmio_read32(&hwe->gt->mmio, reg);
 321}
 322
 323void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 324{
 325	u32 ccs_mask =
 326		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
 327
 328	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
 329		xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
 330				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 331
 332	xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
 333	xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
 334				  xe_bo_ggtt_addr(hwe->hwsp));
 335	xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
 336				  _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 337	xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
 338				  _MASKED_BIT_DISABLE(STOP_RING));
 339	xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
 340}
 341
 342static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
 343						 const struct xe_hw_engine *hwe)
 344{
 345	return xe_gt_ccs_mode_enabled(gt) &&
 346	       xe_rtp_match_first_render_or_compute(gt, hwe);
 347}
 348
 349static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
 350				      const struct xe_hw_engine *hwe)
 351{
 352	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
 353		return false;
 354
 355	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
 356	    hwe->class != XE_ENGINE_CLASS_RENDER)
 357		return false;
 358
 359	return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
 360}
 361
 362void
 363xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 364{
 365	struct xe_gt *gt = hwe->gt;
 366	const u8 mocs_write_idx = gt->mocs.uc_index;
 367	const u8 mocs_read_idx = gt->mocs.uc_index;
 368	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
 369			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
 370	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 371	const struct xe_rtp_entry_sr lrc_setup[] = {
 372		/*
 373		 * Some blitter commands do not have a field for MOCS, those
 374		 * commands will use MOCS index pointed by BLIT_CCTL.
 375		 * BLIT_CCTL registers are needed to be programmed to un-cached.
 376		 */
 377		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
 378		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
 379			       ENGINE_CLASS(COPY)),
 380		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
 381				 BLIT_CCTL_DST_MOCS_MASK |
 382				 BLIT_CCTL_SRC_MOCS_MASK,
 383				 blit_cctl_val,
 384				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 385		},
 386		/* Use Fixed slice CCS mode */
 387		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
 388		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
 389		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
 390					   RCU_MODE_FIXED_SLICE_CCS_MODE))
 391		},
 392		/* Disable WMTP if HW doesn't support it */
 393		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
 394		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
 395		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
 396					   PREEMPT_GPGPU_LEVEL_MASK,
 397					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
 398		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
 399		},
 400		{}
 401	};
 402
 403	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
 404}
 405
 406static void
 407hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 408{
 409	struct xe_gt *gt = hwe->gt;
 410	struct xe_device *xe = gt_to_xe(gt);
 411	/*
 412	 * RING_CMD_CCTL specifies the default MOCS entry that will be
 413	 * used by the command streamer when executing commands that
 414	 * don't have a way to explicitly specify a MOCS setting.
 415	 * The default should usually reference whichever MOCS entry
 416	 * corresponds to uncached behavior, although use of a WB cached
 417	 * entry is recommended by the spec in certain circumstances on
 418	 * specific platforms.
 419	 * Bspec: 72161
 420	 */
 421	const u8 mocs_write_idx = gt->mocs.uc_index;
 422	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
 423				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
 424				 gt->mocs.wb_index : gt->mocs.uc_index;
 425	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
 426				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
 427	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 428	const struct xe_rtp_entry_sr engine_entries[] = {
 429		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
 430		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
 431		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
 432					   CMD_CCTL_WRITE_OVERRIDE_MASK |
 433					   CMD_CCTL_READ_OVERRIDE_MASK,
 434					   ring_cmd_cctl_val,
 435					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 436		},
 437		/*
 438		 * To allow the GSC engine to go idle on MTL we need to enable
 439		 * idle messaging and set the hysteresis value (we use 0xA=5us
 440		 * as recommended in spec). On platforms after MTL this is
 441		 * enabled by default.
 442		 */
 443		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
 444		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
 445		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
 446				     IDLE_MSG_DISABLE,
 447				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
 448				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
 449					   IDLE_WAIT_TIME,
 450					   0xA,
 451					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 452		},
 453		/* Enable Priority Mem Read */
 454		{ XE_RTP_NAME("Priority_Mem_Read"),
 455		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
 456		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
 457				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 458		},
 459		{}
 460	};
 461
 462	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
 463}
 464
 465static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
 466{
 467	const struct engine_info *info;
 468	enum xe_hw_engine_id id;
 469
 470	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
 471		info = &engine_infos[id];
 472		if (info->class == class && info->instance == instance)
 473			return info;
 474	}
 475
 476	return NULL;
 477}
 478
 479static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class)
 480{
 481	/* For MSI-X, hw engines report to offset of engine instance zero */
 482	const struct engine_info *info = find_engine_info(class, 0);
 483
 484	xe_gt_assert(gt, info);
 485
 486	return info ? info->irq_offset : 0;
 487}
 488
 489static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 490				 enum xe_hw_engine_id id)
 491{
 492	const struct engine_info *info;
 493
 494	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
 495		return;
 496
 497	if (!(gt->info.engine_mask & BIT(id)))
 498		return;
 499
 500	info = &engine_infos[id];
 501
 502	xe_gt_assert(gt, !hwe->gt);
 503
 504	hwe->gt = gt;
 505	hwe->class = info->class;
 506	hwe->instance = info->instance;
 507	hwe->mmio_base = info->mmio_base;
 508	hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ?
 509		get_msix_irq_offset(gt, info->class) :
 510		info->irq_offset;
 511	hwe->domain = info->domain;
 512	hwe->name = info->name;
 513	hwe->fence_irq = &gt->fence_irq[info->class];
 514	hwe->engine_id = id;
 515
 516	hwe->eclass = &gt->eclass[hwe->class];
 517	if (!hwe->eclass->sched_props.job_timeout_ms) {
 518		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
 519		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
 520		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
 521		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
 522		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
 523		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
 524		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
 525		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
 526		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 527
 528		/*
 529		 * The GSC engine can accept submissions while the GSC shim is
 530		 * being reset, during which time the submission is stalled. In
 531		 * the worst case, the shim reset can take up to the maximum GSC
 532		 * command execution time (250ms), so the request start can be
 533		 * delayed by that much; the request itself can take that long
 534		 * without being preemptible, which means worst case it can
 535		 * theoretically take up to 500ms for a preemption to go through
 536		 * on the GSC engine. Adding to that an extra 100ms as a safety
 537		 * margin, we get a minimum recommended timeout of 600ms.
 538		 * The preempt_timeout value can't be tuned for OTHER_CLASS
 539		 * because the class is reserved for kernel usage, so we just
 540		 * need to make sure that the starting value is above that
 541		 * threshold; since our default value (640ms) is greater than
 542		 * 600ms, the only way we can go below is via a kconfig setting.
 543		 * If that happens, log it in dmesg and update the value.
 544		 */
 545		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
 546			const u32 min_preempt_timeout = 600 * 1000;
 547			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
 548				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
 549				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
 550			}
 551		}
 552
 553		/* Record default props */
 554		hwe->eclass->defaults = hwe->eclass->sched_props;
 555	}
 556
 557	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
 558	xe_tuning_process_engine(hwe);
 559	xe_wa_process_engine(hwe);
 560	hw_engine_setup_default_state(hwe);
 561
 562	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
 563	xe_reg_whitelist_process_engine(hwe);
 564}
 565
 566static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 567			  enum xe_hw_engine_id id)
 568{
 569	struct xe_device *xe = gt_to_xe(gt);
 570	struct xe_tile *tile = gt_to_tile(gt);
 571	int err;
 572
 573	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
 574	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
 575
 576	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 577	xe_reg_sr_apply_whitelist(hwe);
 578
 579	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
 580						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 581						 XE_BO_FLAG_GGTT |
 582						 XE_BO_FLAG_GGTT_INVALIDATE);
 583	if (IS_ERR(hwe->hwsp)) {
 584		err = PTR_ERR(hwe->hwsp);
 585		goto err_name;
 586	}
 587
 588	if (!xe_device_uc_enabled(xe)) {
 589		hwe->exl_port = xe_execlist_port_create(xe, hwe);
 590		if (IS_ERR(hwe->exl_port)) {
 591			err = PTR_ERR(hwe->exl_port);
 592			goto err_hwsp;
 593		}
 594	} else {
 595		/* GSCCS has a special interrupt for reset */
 596		if (hwe->class == XE_ENGINE_CLASS_OTHER)
 597			hwe->irq_handler = xe_gsc_hwe_irq_handler;
 598
 599		if (!IS_SRIOV_VF(xe))
 600			xe_hw_engine_enable_ring(hwe);
 601	}
 602
 603	/* We reserve the highest BCS instance for USM */
 604	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
 605		gt->usm.reserved_bcs_instance = hwe->instance;
 606
 607	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
 608
 609err_hwsp:
 610	xe_bo_unpin_map_no_vm(hwe->hwsp);
 611err_name:
 612	hwe->name = NULL;
 613
 614	return err;
 615}
 616
 617static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
 618{
 619	int class;
 620
 621	/* FIXME: Doing a simple logical mapping that works for most hardware */
 622	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
 623		struct xe_hw_engine *hwe;
 624		enum xe_hw_engine_id id;
 625		int logical_instance = 0;
 626
 627		for_each_hw_engine(hwe, gt, id)
 628			if (hwe->class == class)
 629				hwe->logical_instance = logical_instance++;
 630	}
 631}
 632
 633static void read_media_fuses(struct xe_gt *gt)
 634{
 635	struct xe_device *xe = gt_to_xe(gt);
 636	u32 media_fuse;
 637	u16 vdbox_mask;
 638	u16 vebox_mask;
 639	int i, j;
 640
 641	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 642
 643	media_fuse = xe_mmio_read32(&gt->mmio, GT_VEBOX_VDBOX_DISABLE);
 644
 645	/*
 646	 * Pre-Xe_HP platforms had register bits representing absent engines,
 647	 * whereas Xe_HP and beyond have bits representing present engines.
 648	 * Invert the polarity on old platforms so that we can use common
 649	 * handling below.
 650	 */
 651	if (GRAPHICS_VERx100(xe) < 1250)
 652		media_fuse = ~media_fuse;
 653
 654	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
 655	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
 656
 657	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
 658		if (!(gt->info.engine_mask & BIT(i)))
 659			continue;
 660
 661		if (!(BIT(j) & vdbox_mask)) {
 662			gt->info.engine_mask &= ~BIT(i);
 663			drm_info(&xe->drm, "vcs%u fused off\n", j);
 664		}
 665	}
 666
 667	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
 668		if (!(gt->info.engine_mask & BIT(i)))
 669			continue;
 670
 671		if (!(BIT(j) & vebox_mask)) {
 672			gt->info.engine_mask &= ~BIT(i);
 673			drm_info(&xe->drm, "vecs%u fused off\n", j);
 674		}
 675	}
 676}
 677
 678static void read_copy_fuses(struct xe_gt *gt)
 679{
 680	struct xe_device *xe = gt_to_xe(gt);
 681	u32 bcs_mask;
 682
 683	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
 684		return;
 685
 686	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 687
 688	bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
 689	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
 690
 691	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
 692	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
 693		if (!(gt->info.engine_mask & BIT(i)))
 694			continue;
 695
 696		if (!(BIT(j / 2) & bcs_mask)) {
 697			gt->info.engine_mask &= ~BIT(i);
 698			drm_info(&xe->drm, "bcs%u fused off\n", j);
 699		}
 700	}
 701}
 702
 703static void read_compute_fuses_from_dss(struct xe_gt *gt)
 704{
 705	struct xe_device *xe = gt_to_xe(gt);
 706
 707	/*
 708	 * CCS fusing based on DSS masks only applies to platforms that can
 709	 * have more than one CCS.
 710	 */
 711	if (hweight64(gt->info.engine_mask &
 712		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
 713		return;
 714
 715	/*
 716	 * CCS availability on Xe_HP is inferred from the presence of DSS in
 717	 * each quadrant.
 718	 */
 719	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
 720		if (!(gt->info.engine_mask & BIT(i)))
 721			continue;
 722
 723		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
 724			gt->info.engine_mask &= ~BIT(i);
 725			drm_info(&xe->drm, "ccs%u fused off\n", j);
 726		}
 727	}
 728}
 729
 730static void read_compute_fuses_from_reg(struct xe_gt *gt)
 731{
 732	struct xe_device *xe = gt_to_xe(gt);
 733	u32 ccs_mask;
 734
 735	ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
 736	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
 737
 738	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
 739		if (!(gt->info.engine_mask & BIT(i)))
 740			continue;
 741
 742		if ((ccs_mask & BIT(j)) == 0) {
 743			gt->info.engine_mask &= ~BIT(i);
 744			drm_info(&xe->drm, "ccs%u fused off\n", j);
 745		}
 746	}
 747}
 748
 749static void read_compute_fuses(struct xe_gt *gt)
 750{
 751	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
 752		read_compute_fuses_from_reg(gt);
 753	else
 754		read_compute_fuses_from_dss(gt);
 755}
 756
 757static void check_gsc_availability(struct xe_gt *gt)
 758{
 759	struct xe_device *xe = gt_to_xe(gt);
 760
 761	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
 762		return;
 763
 764	/*
 765	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
 766	 * have the FW there is nothing we need the engine for and can therefore
 767	 * skip its initialization.
 768	 */
 769	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
 770		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
 771
 772		/* interrupts where previously enabled, so turn them off */
 773		xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
 774		xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
 775
 776		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
 777	}
 778}
 779
 780int xe_hw_engines_init_early(struct xe_gt *gt)
 781{
 782	int i;
 783
 784	read_media_fuses(gt);
 785	read_copy_fuses(gt);
 786	read_compute_fuses(gt);
 787	check_gsc_availability(gt);
 788
 789	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
 790	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
 791
 792	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
 793		hw_engine_init_early(gt, &gt->hw_engines[i], i);
 794
 795	return 0;
 796}
 797
 798int xe_hw_engines_init(struct xe_gt *gt)
 799{
 800	int err;
 801	struct xe_hw_engine *hwe;
 802	enum xe_hw_engine_id id;
 803
 804	for_each_hw_engine(hwe, gt, id) {
 805		err = hw_engine_init(gt, hwe, id);
 806		if (err)
 807			return err;
 808	}
 809
 810	hw_engine_setup_logical_mapping(gt);
 811	err = xe_hw_engine_setup_groups(gt);
 812	if (err)
 813		return err;
 814
 815	return 0;
 816}
 817
 818void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
 819{
 820	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
 821
 822	if (hwe->irq_handler)
 823		hwe->irq_handler(hwe, intr_vec);
 824
 825	if (intr_vec & GT_RENDER_USER_INTERRUPT)
 826		xe_hw_fence_irq_run(hwe->fence_irq);
 827}
 828
 829/**
 830 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
 831 * @hwe: Xe HW Engine.
 832 * @job: The job object.
 833 *
 834 * This can be printed out in a later stage like during dev_coredump
 835 * analysis.
 836 *
 837 * Returns: a Xe HW Engine snapshot object that must be freed by the
 838 * caller, using `xe_hw_engine_snapshot_free`.
 839 */
 840struct xe_hw_engine_snapshot *
 841xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
 842{
 843	struct xe_hw_engine_snapshot *snapshot;
 844	struct __guc_capture_parsed_output *node;
 845
 846	if (!xe_hw_engine_is_valid(hwe))
 847		return NULL;
 848
 849	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
 850
 851	if (!snapshot)
 852		return NULL;
 853
 854	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
 855	snapshot->hwe = hwe;
 856	snapshot->logical_instance = hwe->logical_instance;
 857	snapshot->forcewake.domain = hwe->domain;
 858	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
 859						    hwe->domain);
 860	snapshot->mmio_base = hwe->mmio_base;
 861	snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe);
 862
 863	/* no more VF accessible data below this point */
 864	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
 865		return snapshot;
 866
 867	if (job) {
 868		/* If got guc capture, set source to GuC */
 869		node = xe_guc_capture_get_matching_and_lock(job);
 870		if (node) {
 871			struct xe_device *xe = gt_to_xe(hwe->gt);
 872			struct xe_devcoredump *coredump = &xe->devcoredump;
 873
 874			coredump->snapshot.matched_node = node;
 875			snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
 876			xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
 877			return snapshot;
 878		}
 879	}
 880
 881	/* otherwise, do manual capture */
 882	xe_engine_manual_capture(hwe, snapshot);
 883	snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
 884	xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
 885
 886	return snapshot;
 887}
 888
 889/**
 890 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
 891 * @snapshot: Xe HW Engine snapshot object.
 892 *
 893 * This function free all the memory that needed to be allocated at capture
 894 * time.
 895 */
 896void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
 897{
 898	struct xe_gt *gt;
 899	if (!snapshot)
 900		return;
 901
 902	gt = snapshot->hwe->gt;
 903	/*
 904	 * xe_guc_capture_put_matched_nodes is called here and from
 905	 * xe_devcoredump_snapshot_free, to cover the 2 calling paths
 906	 * of hw_engines - debugfs and devcoredump free.
 907	 */
 908	xe_guc_capture_put_matched_nodes(&gt->uc.guc);
 909
 910	kfree(snapshot->name);
 911	kfree(snapshot);
 912}
 913
 914/**
 915 * xe_hw_engine_print - Xe HW Engine Print.
 916 * @hwe: Hardware Engine.
 917 * @p: drm_printer.
 918 *
 919 * This function quickly capture a snapshot and immediately print it out.
 920 */
 921void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
 922{
 923	struct xe_hw_engine_snapshot *snapshot;
 924
 925	snapshot = xe_hw_engine_snapshot_capture(hwe, NULL);
 926	xe_engine_snapshot_print(snapshot, p);
 927	xe_hw_engine_snapshot_free(snapshot);
 928}
 929
 930u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
 931				enum xe_engine_class engine_class)
 932{
 933	u32 mask = 0;
 934	enum xe_hw_engine_id id;
 935
 936	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
 937		if (engine_infos[id].class == engine_class &&
 938		    gt->info.engine_mask & BIT(id))
 939			mask |= BIT(engine_infos[id].instance);
 940	}
 941	return mask;
 942}
 943
 944bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 945{
 946	struct xe_gt *gt = hwe->gt;
 947	struct xe_device *xe = gt_to_xe(gt);
 948
 949	if (hwe->class == XE_ENGINE_CLASS_OTHER)
 950		return true;
 951
 952	/* Check for engines disabled by ccs_mode setting */
 953	if (xe_gt_ccs_mode_enabled(gt) &&
 954	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
 955	    hwe->logical_instance >= gt->ccs_mode)
 956		return true;
 957
 958	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 959		hwe->instance == gt->usm.reserved_bcs_instance;
 960}
 961
 962const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
 963{
 964	switch (class) {
 965	case XE_ENGINE_CLASS_RENDER:
 966		return "rcs";
 967	case XE_ENGINE_CLASS_VIDEO_DECODE:
 968		return "vcs";
 969	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
 970		return "vecs";
 971	case XE_ENGINE_CLASS_COPY:
 972		return "bcs";
 973	case XE_ENGINE_CLASS_OTHER:
 974		return "other";
 975	case XE_ENGINE_CLASS_COMPUTE:
 976		return "ccs";
 977	case XE_ENGINE_CLASS_MAX:
 978		break;
 979	}
 980
 981	return NULL;
 982}
 983
 984u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
 985{
 986	return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base));
 987}
 988
 989enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
 990{
 991	return engine_infos[hwe->engine_id].domain;
 992}
 993
 994static const enum xe_engine_class user_to_xe_engine_class[] = {
 995	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
 996	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
 997	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
 998	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 999	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
1000};
1001
1002/**
1003 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
1004 * @xe: xe device
1005 * @eci: engine class and instance
1006 *
1007 * This function will find a hardware engine for given engine
1008 * class and instance.
1009 *
1010 * Return: If found xe_hw_engine pointer, NULL otherwise.
1011 */
1012struct xe_hw_engine *
1013xe_hw_engine_lookup(struct xe_device *xe,
1014		    struct drm_xe_engine_class_instance eci)
1015{
1016	unsigned int idx;
1017
1018	if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
1019		return NULL;
1020
1021	if (eci.gt_id >= xe->info.gt_count)
1022		return NULL;
1023
1024	idx = array_index_nospec(eci.engine_class,
1025				 ARRAY_SIZE(user_to_xe_engine_class));
1026
1027	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
1028			       user_to_xe_engine_class[idx],
1029			       eci.engine_instance, true);
1030}