Linux Audio

Check our new training course

Real-Time Linux with PREEMPT_RT training

Feb 18-20, 2025
Register
Loading...
v6.13.7
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_hw_engine.h"
   7
   8#include <linux/nospec.h>
   9
  10#include <drm/drm_managed.h>
  11#include <uapi/drm/xe_drm.h>
  12
  13#include "regs/xe_engine_regs.h"
  14#include "regs/xe_gt_regs.h"
  15#include "regs/xe_irq_regs.h"
  16#include "xe_assert.h"
  17#include "xe_bo.h"
  18#include "xe_device.h"
  19#include "xe_execlist.h"
  20#include "xe_force_wake.h"
  21#include "xe_gsc.h"
  22#include "xe_gt.h"
  23#include "xe_gt_ccs_mode.h"
  24#include "xe_gt_printk.h"
  25#include "xe_gt_mcr.h"
  26#include "xe_gt_topology.h"
  27#include "xe_guc_capture.h"
  28#include "xe_hw_engine_group.h"
  29#include "xe_hw_fence.h"
  30#include "xe_irq.h"
  31#include "xe_lrc.h"
  32#include "xe_macros.h"
  33#include "xe_mmio.h"
  34#include "xe_reg_sr.h"
  35#include "xe_reg_whitelist.h"
  36#include "xe_rtp.h"
  37#include "xe_sched_job.h"
  38#include "xe_sriov.h"
  39#include "xe_tuning.h"
  40#include "xe_uc_fw.h"
  41#include "xe_wa.h"
  42
  43#define MAX_MMIO_BASES 3
  44struct engine_info {
  45	const char *name;
  46	unsigned int class : 8;
  47	unsigned int instance : 8;
  48	unsigned int irq_offset : 8;
  49	enum xe_force_wake_domains domain;
  50	u32 mmio_base;
  51};
  52
  53static const struct engine_info engine_infos[] = {
  54	[XE_HW_ENGINE_RCS0] = {
  55		.name = "rcs0",
  56		.class = XE_ENGINE_CLASS_RENDER,
  57		.instance = 0,
  58		.irq_offset = ilog2(INTR_RCS0),
  59		.domain = XE_FW_RENDER,
  60		.mmio_base = RENDER_RING_BASE,
  61	},
  62	[XE_HW_ENGINE_BCS0] = {
  63		.name = "bcs0",
  64		.class = XE_ENGINE_CLASS_COPY,
  65		.instance = 0,
  66		.irq_offset = ilog2(INTR_BCS(0)),
  67		.domain = XE_FW_RENDER,
  68		.mmio_base = BLT_RING_BASE,
  69	},
  70	[XE_HW_ENGINE_BCS1] = {
  71		.name = "bcs1",
  72		.class = XE_ENGINE_CLASS_COPY,
  73		.instance = 1,
  74		.irq_offset = ilog2(INTR_BCS(1)),
  75		.domain = XE_FW_RENDER,
  76		.mmio_base = XEHPC_BCS1_RING_BASE,
  77	},
  78	[XE_HW_ENGINE_BCS2] = {
  79		.name = "bcs2",
  80		.class = XE_ENGINE_CLASS_COPY,
  81		.instance = 2,
  82		.irq_offset = ilog2(INTR_BCS(2)),
  83		.domain = XE_FW_RENDER,
  84		.mmio_base = XEHPC_BCS2_RING_BASE,
  85	},
  86	[XE_HW_ENGINE_BCS3] = {
  87		.name = "bcs3",
  88		.class = XE_ENGINE_CLASS_COPY,
  89		.instance = 3,
  90		.irq_offset = ilog2(INTR_BCS(3)),
  91		.domain = XE_FW_RENDER,
  92		.mmio_base = XEHPC_BCS3_RING_BASE,
  93	},
  94	[XE_HW_ENGINE_BCS4] = {
  95		.name = "bcs4",
  96		.class = XE_ENGINE_CLASS_COPY,
  97		.instance = 4,
  98		.irq_offset = ilog2(INTR_BCS(4)),
  99		.domain = XE_FW_RENDER,
 100		.mmio_base = XEHPC_BCS4_RING_BASE,
 101	},
 102	[XE_HW_ENGINE_BCS5] = {
 103		.name = "bcs5",
 104		.class = XE_ENGINE_CLASS_COPY,
 105		.instance = 5,
 106		.irq_offset = ilog2(INTR_BCS(5)),
 107		.domain = XE_FW_RENDER,
 108		.mmio_base = XEHPC_BCS5_RING_BASE,
 109	},
 110	[XE_HW_ENGINE_BCS6] = {
 111		.name = "bcs6",
 112		.class = XE_ENGINE_CLASS_COPY,
 113		.instance = 6,
 114		.irq_offset = ilog2(INTR_BCS(6)),
 115		.domain = XE_FW_RENDER,
 116		.mmio_base = XEHPC_BCS6_RING_BASE,
 117	},
 118	[XE_HW_ENGINE_BCS7] = {
 119		.name = "bcs7",
 120		.class = XE_ENGINE_CLASS_COPY,
 121		.irq_offset = ilog2(INTR_BCS(7)),
 122		.instance = 7,
 123		.domain = XE_FW_RENDER,
 124		.mmio_base = XEHPC_BCS7_RING_BASE,
 125	},
 126	[XE_HW_ENGINE_BCS8] = {
 127		.name = "bcs8",
 128		.class = XE_ENGINE_CLASS_COPY,
 129		.instance = 8,
 130		.irq_offset = ilog2(INTR_BCS8),
 131		.domain = XE_FW_RENDER,
 132		.mmio_base = XEHPC_BCS8_RING_BASE,
 133	},
 134
 135	[XE_HW_ENGINE_VCS0] = {
 136		.name = "vcs0",
 137		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 138		.instance = 0,
 139		.irq_offset = 32 + ilog2(INTR_VCS(0)),
 140		.domain = XE_FW_MEDIA_VDBOX0,
 141		.mmio_base = BSD_RING_BASE,
 142	},
 143	[XE_HW_ENGINE_VCS1] = {
 144		.name = "vcs1",
 145		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 146		.instance = 1,
 147		.irq_offset = 32 + ilog2(INTR_VCS(1)),
 148		.domain = XE_FW_MEDIA_VDBOX1,
 149		.mmio_base = BSD2_RING_BASE,
 150	},
 151	[XE_HW_ENGINE_VCS2] = {
 152		.name = "vcs2",
 153		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 154		.instance = 2,
 155		.irq_offset = 32 + ilog2(INTR_VCS(2)),
 156		.domain = XE_FW_MEDIA_VDBOX2,
 157		.mmio_base = BSD3_RING_BASE,
 158	},
 159	[XE_HW_ENGINE_VCS3] = {
 160		.name = "vcs3",
 161		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 162		.instance = 3,
 163		.irq_offset = 32 + ilog2(INTR_VCS(3)),
 164		.domain = XE_FW_MEDIA_VDBOX3,
 165		.mmio_base = BSD4_RING_BASE,
 166	},
 167	[XE_HW_ENGINE_VCS4] = {
 168		.name = "vcs4",
 169		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 170		.instance = 4,
 171		.irq_offset = 32 + ilog2(INTR_VCS(4)),
 172		.domain = XE_FW_MEDIA_VDBOX4,
 173		.mmio_base = XEHP_BSD5_RING_BASE,
 174	},
 175	[XE_HW_ENGINE_VCS5] = {
 176		.name = "vcs5",
 177		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 178		.instance = 5,
 179		.irq_offset = 32 + ilog2(INTR_VCS(5)),
 180		.domain = XE_FW_MEDIA_VDBOX5,
 181		.mmio_base = XEHP_BSD6_RING_BASE,
 182	},
 183	[XE_HW_ENGINE_VCS6] = {
 184		.name = "vcs6",
 185		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 186		.instance = 6,
 187		.irq_offset = 32 + ilog2(INTR_VCS(6)),
 188		.domain = XE_FW_MEDIA_VDBOX6,
 189		.mmio_base = XEHP_BSD7_RING_BASE,
 190	},
 191	[XE_HW_ENGINE_VCS7] = {
 192		.name = "vcs7",
 193		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 194		.instance = 7,
 195		.irq_offset = 32 + ilog2(INTR_VCS(7)),
 196		.domain = XE_FW_MEDIA_VDBOX7,
 197		.mmio_base = XEHP_BSD8_RING_BASE,
 198	},
 199	[XE_HW_ENGINE_VECS0] = {
 200		.name = "vecs0",
 201		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 202		.instance = 0,
 203		.irq_offset = 32 + ilog2(INTR_VECS(0)),
 204		.domain = XE_FW_MEDIA_VEBOX0,
 205		.mmio_base = VEBOX_RING_BASE,
 206	},
 207	[XE_HW_ENGINE_VECS1] = {
 208		.name = "vecs1",
 209		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 210		.instance = 1,
 211		.irq_offset = 32 + ilog2(INTR_VECS(1)),
 212		.domain = XE_FW_MEDIA_VEBOX1,
 213		.mmio_base = VEBOX2_RING_BASE,
 214	},
 215	[XE_HW_ENGINE_VECS2] = {
 216		.name = "vecs2",
 217		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 218		.instance = 2,
 219		.irq_offset = 32 + ilog2(INTR_VECS(2)),
 220		.domain = XE_FW_MEDIA_VEBOX2,
 221		.mmio_base = XEHP_VEBOX3_RING_BASE,
 222	},
 223	[XE_HW_ENGINE_VECS3] = {
 224		.name = "vecs3",
 225		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 226		.instance = 3,
 227		.irq_offset = 32 + ilog2(INTR_VECS(3)),
 228		.domain = XE_FW_MEDIA_VEBOX3,
 229		.mmio_base = XEHP_VEBOX4_RING_BASE,
 230	},
 231	[XE_HW_ENGINE_CCS0] = {
 232		.name = "ccs0",
 233		.class = XE_ENGINE_CLASS_COMPUTE,
 234		.instance = 0,
 235		.irq_offset = ilog2(INTR_CCS(0)),
 236		.domain = XE_FW_RENDER,
 237		.mmio_base = COMPUTE0_RING_BASE,
 238	},
 239	[XE_HW_ENGINE_CCS1] = {
 240		.name = "ccs1",
 241		.class = XE_ENGINE_CLASS_COMPUTE,
 242		.instance = 1,
 243		.irq_offset = ilog2(INTR_CCS(1)),
 244		.domain = XE_FW_RENDER,
 245		.mmio_base = COMPUTE1_RING_BASE,
 246	},
 247	[XE_HW_ENGINE_CCS2] = {
 248		.name = "ccs2",
 249		.class = XE_ENGINE_CLASS_COMPUTE,
 250		.instance = 2,
 251		.irq_offset = ilog2(INTR_CCS(2)),
 252		.domain = XE_FW_RENDER,
 253		.mmio_base = COMPUTE2_RING_BASE,
 254	},
 255	[XE_HW_ENGINE_CCS3] = {
 256		.name = "ccs3",
 257		.class = XE_ENGINE_CLASS_COMPUTE,
 258		.instance = 3,
 259		.irq_offset = ilog2(INTR_CCS(3)),
 260		.domain = XE_FW_RENDER,
 261		.mmio_base = COMPUTE3_RING_BASE,
 262	},
 263	[XE_HW_ENGINE_GSCCS0] = {
 264		.name = "gsccs0",
 265		.class = XE_ENGINE_CLASS_OTHER,
 266		.instance = OTHER_GSC_INSTANCE,
 267		.domain = XE_FW_GSC,
 268		.mmio_base = GSCCS_RING_BASE,
 269	},
 270};
 271
 272static void hw_engine_fini(void *arg)
 273{
 274	struct xe_hw_engine *hwe = arg;
 275
 276	if (hwe->exl_port)
 277		xe_execlist_port_destroy(hwe->exl_port);
 
 278
 279	hwe->gt = NULL;
 280}
 281
 282/**
 283 * xe_hw_engine_mmio_write32() - Write engine register
 284 * @hwe: engine
 285 * @reg: register to write into
 286 * @val: desired 32-bit value to write
 287 *
 288 * This function will write val into an engine specific register.
 289 * Forcewake must be held by the caller.
 290 *
 291 */
 292void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
 293			       struct xe_reg reg, u32 val)
 294{
 295	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 296	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 297
 298	reg.addr += hwe->mmio_base;
 299
 300	xe_mmio_write32(&hwe->gt->mmio, reg, val);
 301}
 302
 303/**
 304 * xe_hw_engine_mmio_read32() - Read engine register
 305 * @hwe: engine
 306 * @reg: register to read from
 307 *
 308 * This function will read from an engine specific register.
 309 * Forcewake must be held by the caller.
 310 *
 311 * Return: value of the 32-bit register.
 312 */
 313u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 314{
 315	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
 316	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
 317
 318	reg.addr += hwe->mmio_base;
 319
 320	return xe_mmio_read32(&hwe->gt->mmio, reg);
 321}
 322
 323void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
 324{
 325	u32 ccs_mask =
 326		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
 327
 328	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
 329		xe_mmio_write32(&hwe->gt->mmio, RCU_MODE,
 330				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 331
 332	xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
 333	xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
 334				  xe_bo_ggtt_addr(hwe->hwsp));
 335	xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
 336				  _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 337	xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
 338				  _MASKED_BIT_DISABLE(STOP_RING));
 339	xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
 340}
 341
 342static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
 343						 const struct xe_hw_engine *hwe)
 344{
 345	return xe_gt_ccs_mode_enabled(gt) &&
 346	       xe_rtp_match_first_render_or_compute(gt, hwe);
 347}
 348
 349static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
 350				      const struct xe_hw_engine *hwe)
 351{
 352	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
 353		return false;
 354
 355	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
 356	    hwe->class != XE_ENGINE_CLASS_RENDER)
 357		return false;
 358
 359	return xe_mmio_read32(&hwe->gt->mmio, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
 360}
 361
 362void
 363xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 364{
 365	struct xe_gt *gt = hwe->gt;
 366	const u8 mocs_write_idx = gt->mocs.uc_index;
 367	const u8 mocs_read_idx = gt->mocs.uc_index;
 368	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
 369			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
 370	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 371	const struct xe_rtp_entry_sr lrc_setup[] = {
 372		/*
 373		 * Some blitter commands do not have a field for MOCS, those
 374		 * commands will use MOCS index pointed by BLIT_CCTL.
 375		 * BLIT_CCTL registers are needed to be programmed to un-cached.
 376		 */
 377		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
 378		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
 379			       ENGINE_CLASS(COPY)),
 380		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
 381				 BLIT_CCTL_DST_MOCS_MASK |
 382				 BLIT_CCTL_SRC_MOCS_MASK,
 383				 blit_cctl_val,
 384				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 385		},
 386		/* Use Fixed slice CCS mode */
 387		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
 388		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
 389		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
 390					   RCU_MODE_FIXED_SLICE_CCS_MODE))
 391		},
 392		/* Disable WMTP if HW doesn't support it */
 393		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
 394		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
 395		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
 396					   PREEMPT_GPGPU_LEVEL_MASK,
 397					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
 398		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
 399		},
 400		{}
 401	};
 402
 403	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
 404}
 405
 406static void
 407hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 408{
 409	struct xe_gt *gt = hwe->gt;
 410	struct xe_device *xe = gt_to_xe(gt);
 411	/*
 412	 * RING_CMD_CCTL specifies the default MOCS entry that will be
 413	 * used by the command streamer when executing commands that
 414	 * don't have a way to explicitly specify a MOCS setting.
 415	 * The default should usually reference whichever MOCS entry
 416	 * corresponds to uncached behavior, although use of a WB cached
 417	 * entry is recommended by the spec in certain circumstances on
 418	 * specific platforms.
 419	 * Bspec: 72161
 420	 */
 421	const u8 mocs_write_idx = gt->mocs.uc_index;
 422	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE && IS_DGFX(xe) &&
 423				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
 424				 gt->mocs.wb_index : gt->mocs.uc_index;
 425	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
 426				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
 427	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 428	const struct xe_rtp_entry_sr engine_entries[] = {
 429		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
 430		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
 431		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
 432					   CMD_CCTL_WRITE_OVERRIDE_MASK |
 433					   CMD_CCTL_READ_OVERRIDE_MASK,
 434					   ring_cmd_cctl_val,
 435					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 436		},
 437		/*
 438		 * To allow the GSC engine to go idle on MTL we need to enable
 439		 * idle messaging and set the hysteresis value (we use 0xA=5us
 440		 * as recommended in spec). On platforms after MTL this is
 441		 * enabled by default.
 442		 */
 443		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
 444		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
 445		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
 446				     IDLE_MSG_DISABLE,
 447				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
 448				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
 449					   IDLE_WAIT_TIME,
 450					   0xA,
 451					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 452		},
 453		/* Enable Priority Mem Read */
 454		{ XE_RTP_NAME("Priority_Mem_Read"),
 455		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
 456		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
 457				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 458		},
 459		{}
 460	};
 461
 462	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
 463}
 464
 465static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
 466{
 467	const struct engine_info *info;
 468	enum xe_hw_engine_id id;
 469
 470	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
 471		info = &engine_infos[id];
 472		if (info->class == class && info->instance == instance)
 473			return info;
 474	}
 475
 476	return NULL;
 477}
 478
 479static u16 get_msix_irq_offset(struct xe_gt *gt, enum xe_engine_class class)
 480{
 481	/* For MSI-X, hw engines report to offset of engine instance zero */
 482	const struct engine_info *info = find_engine_info(class, 0);
 483
 484	xe_gt_assert(gt, info);
 485
 486	return info ? info->irq_offset : 0;
 487}
 488
 489static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 490				 enum xe_hw_engine_id id)
 491{
 492	const struct engine_info *info;
 493
 494	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
 495		return;
 496
 497	if (!(gt->info.engine_mask & BIT(id)))
 498		return;
 499
 500	info = &engine_infos[id];
 501
 502	xe_gt_assert(gt, !hwe->gt);
 503
 504	hwe->gt = gt;
 505	hwe->class = info->class;
 506	hwe->instance = info->instance;
 507	hwe->mmio_base = info->mmio_base;
 508	hwe->irq_offset = xe_device_has_msix(gt_to_xe(gt)) ?
 509		get_msix_irq_offset(gt, info->class) :
 510		info->irq_offset;
 511	hwe->domain = info->domain;
 512	hwe->name = info->name;
 513	hwe->fence_irq = &gt->fence_irq[info->class];
 514	hwe->engine_id = id;
 515
 516	hwe->eclass = &gt->eclass[hwe->class];
 517	if (!hwe->eclass->sched_props.job_timeout_ms) {
 518		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
 519		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
 520		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
 521		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
 522		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
 523		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
 524		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
 525		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
 526		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 527
 528		/*
 529		 * The GSC engine can accept submissions while the GSC shim is
 530		 * being reset, during which time the submission is stalled. In
 531		 * the worst case, the shim reset can take up to the maximum GSC
 532		 * command execution time (250ms), so the request start can be
 533		 * delayed by that much; the request itself can take that long
 534		 * without being preemptible, which means worst case it can
 535		 * theoretically take up to 500ms for a preemption to go through
 536		 * on the GSC engine. Adding to that an extra 100ms as a safety
 537		 * margin, we get a minimum recommended timeout of 600ms.
 538		 * The preempt_timeout value can't be tuned for OTHER_CLASS
 539		 * because the class is reserved for kernel usage, so we just
 540		 * need to make sure that the starting value is above that
 541		 * threshold; since our default value (640ms) is greater than
 542		 * 600ms, the only way we can go below is via a kconfig setting.
 543		 * If that happens, log it in dmesg and update the value.
 544		 */
 545		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
 546			const u32 min_preempt_timeout = 600 * 1000;
 547			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
 548				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
 549				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
 550			}
 551		}
 552
 553		/* Record default props */
 554		hwe->eclass->defaults = hwe->eclass->sched_props;
 555	}
 556
 557	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
 558	xe_tuning_process_engine(hwe);
 559	xe_wa_process_engine(hwe);
 560	hw_engine_setup_default_state(hwe);
 561
 562	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
 563	xe_reg_whitelist_process_engine(hwe);
 564}
 565
 566static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 567			  enum xe_hw_engine_id id)
 568{
 569	struct xe_device *xe = gt_to_xe(gt);
 570	struct xe_tile *tile = gt_to_tile(gt);
 571	int err;
 572
 573	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
 574	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
 575
 576	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
 577	xe_reg_sr_apply_whitelist(hwe);
 578
 579	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
 580						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 581						 XE_BO_FLAG_GGTT |
 582						 XE_BO_FLAG_GGTT_INVALIDATE);
 583	if (IS_ERR(hwe->hwsp)) {
 584		err = PTR_ERR(hwe->hwsp);
 585		goto err_name;
 586	}
 587
 
 
 
 
 588	if (!xe_device_uc_enabled(xe)) {
 589		hwe->exl_port = xe_execlist_port_create(xe, hwe);
 590		if (IS_ERR(hwe->exl_port)) {
 591			err = PTR_ERR(hwe->exl_port);
 592			goto err_hwsp;
 593		}
 594	} else {
 595		/* GSCCS has a special interrupt for reset */
 596		if (hwe->class == XE_ENGINE_CLASS_OTHER)
 597			hwe->irq_handler = xe_gsc_hwe_irq_handler;
 598
 599		if (!IS_SRIOV_VF(xe))
 600			xe_hw_engine_enable_ring(hwe);
 601	}
 602
 
 
 
 603	/* We reserve the highest BCS instance for USM */
 604	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
 605		gt->usm.reserved_bcs_instance = hwe->instance;
 606
 607	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
 
 
 
 
 608
 
 
 609err_hwsp:
 610	xe_bo_unpin_map_no_vm(hwe->hwsp);
 611err_name:
 612	hwe->name = NULL;
 613
 614	return err;
 615}
 616
 617static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
 618{
 619	int class;
 620
 621	/* FIXME: Doing a simple logical mapping that works for most hardware */
 622	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
 623		struct xe_hw_engine *hwe;
 624		enum xe_hw_engine_id id;
 625		int logical_instance = 0;
 626
 627		for_each_hw_engine(hwe, gt, id)
 628			if (hwe->class == class)
 629				hwe->logical_instance = logical_instance++;
 630	}
 631}
 632
 633static void read_media_fuses(struct xe_gt *gt)
 634{
 635	struct xe_device *xe = gt_to_xe(gt);
 636	u32 media_fuse;
 637	u16 vdbox_mask;
 638	u16 vebox_mask;
 639	int i, j;
 640
 641	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 642
 643	media_fuse = xe_mmio_read32(&gt->mmio, GT_VEBOX_VDBOX_DISABLE);
 644
 645	/*
 646	 * Pre-Xe_HP platforms had register bits representing absent engines,
 647	 * whereas Xe_HP and beyond have bits representing present engines.
 648	 * Invert the polarity on old platforms so that we can use common
 649	 * handling below.
 650	 */
 651	if (GRAPHICS_VERx100(xe) < 1250)
 652		media_fuse = ~media_fuse;
 653
 654	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
 655	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
 656
 657	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
 658		if (!(gt->info.engine_mask & BIT(i)))
 659			continue;
 660
 661		if (!(BIT(j) & vdbox_mask)) {
 662			gt->info.engine_mask &= ~BIT(i);
 663			drm_info(&xe->drm, "vcs%u fused off\n", j);
 664		}
 665	}
 666
 667	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
 668		if (!(gt->info.engine_mask & BIT(i)))
 669			continue;
 670
 671		if (!(BIT(j) & vebox_mask)) {
 672			gt->info.engine_mask &= ~BIT(i);
 673			drm_info(&xe->drm, "vecs%u fused off\n", j);
 674		}
 675	}
 676}
 677
 678static void read_copy_fuses(struct xe_gt *gt)
 679{
 680	struct xe_device *xe = gt_to_xe(gt);
 681	u32 bcs_mask;
 682
 683	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
 684		return;
 685
 686	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 687
 688	bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
 689	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
 690
 691	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
 692	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
 693		if (!(gt->info.engine_mask & BIT(i)))
 694			continue;
 695
 696		if (!(BIT(j / 2) & bcs_mask)) {
 697			gt->info.engine_mask &= ~BIT(i);
 698			drm_info(&xe->drm, "bcs%u fused off\n", j);
 699		}
 700	}
 701}
 702
 703static void read_compute_fuses_from_dss(struct xe_gt *gt)
 704{
 705	struct xe_device *xe = gt_to_xe(gt);
 706
 707	/*
 708	 * CCS fusing based on DSS masks only applies to platforms that can
 709	 * have more than one CCS.
 710	 */
 711	if (hweight64(gt->info.engine_mask &
 712		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
 713		return;
 714
 715	/*
 716	 * CCS availability on Xe_HP is inferred from the presence of DSS in
 717	 * each quadrant.
 718	 */
 719	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
 720		if (!(gt->info.engine_mask & BIT(i)))
 721			continue;
 722
 723		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
 724			gt->info.engine_mask &= ~BIT(i);
 725			drm_info(&xe->drm, "ccs%u fused off\n", j);
 726		}
 727	}
 728}
 729
 730static void read_compute_fuses_from_reg(struct xe_gt *gt)
 731{
 732	struct xe_device *xe = gt_to_xe(gt);
 733	u32 ccs_mask;
 734
 735	ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
 736	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
 737
 738	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
 739		if (!(gt->info.engine_mask & BIT(i)))
 740			continue;
 741
 742		if ((ccs_mask & BIT(j)) == 0) {
 743			gt->info.engine_mask &= ~BIT(i);
 744			drm_info(&xe->drm, "ccs%u fused off\n", j);
 745		}
 746	}
 747}
 748
 749static void read_compute_fuses(struct xe_gt *gt)
 750{
 751	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
 752		read_compute_fuses_from_reg(gt);
 753	else
 754		read_compute_fuses_from_dss(gt);
 755}
 756
 757static void check_gsc_availability(struct xe_gt *gt)
 758{
 759	struct xe_device *xe = gt_to_xe(gt);
 760
 761	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
 762		return;
 763
 764	/*
 765	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
 766	 * have the FW there is nothing we need the engine for and can therefore
 767	 * skip its initialization.
 768	 */
 769	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
 770		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
 771
 772		/* interrupts where previously enabled, so turn them off */
 773		xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
 774		xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
 775
 776		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
 777	}
 778}
 779
 780int xe_hw_engines_init_early(struct xe_gt *gt)
 781{
 782	int i;
 783
 784	read_media_fuses(gt);
 785	read_copy_fuses(gt);
 786	read_compute_fuses(gt);
 787	check_gsc_availability(gt);
 788
 789	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
 790	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
 791
 792	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
 793		hw_engine_init_early(gt, &gt->hw_engines[i], i);
 794
 795	return 0;
 796}
 797
 798int xe_hw_engines_init(struct xe_gt *gt)
 799{
 800	int err;
 801	struct xe_hw_engine *hwe;
 802	enum xe_hw_engine_id id;
 803
 804	for_each_hw_engine(hwe, gt, id) {
 805		err = hw_engine_init(gt, hwe, id);
 806		if (err)
 807			return err;
 808	}
 809
 810	hw_engine_setup_logical_mapping(gt);
 811	err = xe_hw_engine_setup_groups(gt);
 812	if (err)
 813		return err;
 814
 815	return 0;
 816}
 817
 818void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
 819{
 820	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
 821
 822	if (hwe->irq_handler)
 823		hwe->irq_handler(hwe, intr_vec);
 824
 825	if (intr_vec & GT_RENDER_USER_INTERRUPT)
 826		xe_hw_fence_irq_run(hwe->fence_irq);
 827}
 828
 829/**
 830 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
 831 * @hwe: Xe HW Engine.
 832 * @job: The job object.
 833 *
 834 * This can be printed out in a later stage like during dev_coredump
 835 * analysis.
 836 *
 837 * Returns: a Xe HW Engine snapshot object that must be freed by the
 838 * caller, using `xe_hw_engine_snapshot_free`.
 839 */
 840struct xe_hw_engine_snapshot *
 841xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job)
 842{
 843	struct xe_hw_engine_snapshot *snapshot;
 844	struct __guc_capture_parsed_output *node;
 845
 846	if (!xe_hw_engine_is_valid(hwe))
 847		return NULL;
 848
 849	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
 850
 851	if (!snapshot)
 852		return NULL;
 853
 854	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
 855	snapshot->hwe = hwe;
 
 
 
 
 856	snapshot->logical_instance = hwe->logical_instance;
 857	snapshot->forcewake.domain = hwe->domain;
 858	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
 859						    hwe->domain);
 860	snapshot->mmio_base = hwe->mmio_base;
 861	snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe);
 862
 863	/* no more VF accessible data below this point */
 864	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
 865		return snapshot;
 866
 867	if (job) {
 868		/* If got guc capture, set source to GuC */
 869		node = xe_guc_capture_get_matching_and_lock(job);
 870		if (node) {
 871			struct xe_device *xe = gt_to_xe(hwe->gt);
 872			struct xe_devcoredump *coredump = &xe->devcoredump;
 873
 874			coredump->snapshot.matched_node = node;
 875			snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC;
 876			xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node");
 877			return snapshot;
 878		}
 879	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 880
 881	/* otherwise, do manual capture */
 882	xe_engine_manual_capture(hwe, snapshot);
 883	snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL;
 884	xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot");
 885
 886	return snapshot;
 887}
 888
 889/**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 890 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
 891 * @snapshot: Xe HW Engine snapshot object.
 892 *
 893 * This function free all the memory that needed to be allocated at capture
 894 * time.
 895 */
 896void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
 897{
 898	struct xe_gt *gt;
 899	if (!snapshot)
 900		return;
 901
 902	gt = snapshot->hwe->gt;
 903	/*
 904	 * xe_guc_capture_put_matched_nodes is called here and from
 905	 * xe_devcoredump_snapshot_free, to cover the 2 calling paths
 906	 * of hw_engines - debugfs and devcoredump free.
 907	 */
 908	xe_guc_capture_put_matched_nodes(&gt->uc.guc);
 909
 910	kfree(snapshot->name);
 911	kfree(snapshot);
 912}
 913
 914/**
 915 * xe_hw_engine_print - Xe HW Engine Print.
 916 * @hwe: Hardware Engine.
 917 * @p: drm_printer.
 918 *
 919 * This function quickly capture a snapshot and immediately print it out.
 920 */
 921void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
 922{
 923	struct xe_hw_engine_snapshot *snapshot;
 924
 925	snapshot = xe_hw_engine_snapshot_capture(hwe, NULL);
 926	xe_engine_snapshot_print(snapshot, p);
 927	xe_hw_engine_snapshot_free(snapshot);
 928}
 929
 930u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
 931				enum xe_engine_class engine_class)
 932{
 933	u32 mask = 0;
 934	enum xe_hw_engine_id id;
 935
 936	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
 937		if (engine_infos[id].class == engine_class &&
 938		    gt->info.engine_mask & BIT(id))
 939			mask |= BIT(engine_infos[id].instance);
 940	}
 941	return mask;
 942}
 943
 944bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
 945{
 946	struct xe_gt *gt = hwe->gt;
 947	struct xe_device *xe = gt_to_xe(gt);
 948
 949	if (hwe->class == XE_ENGINE_CLASS_OTHER)
 950		return true;
 951
 952	/* Check for engines disabled by ccs_mode setting */
 953	if (xe_gt_ccs_mode_enabled(gt) &&
 954	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
 955	    hwe->logical_instance >= gt->ccs_mode)
 956		return true;
 957
 958	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
 959		hwe->instance == gt->usm.reserved_bcs_instance;
 960}
 961
 962const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
 963{
 964	switch (class) {
 965	case XE_ENGINE_CLASS_RENDER:
 966		return "rcs";
 967	case XE_ENGINE_CLASS_VIDEO_DECODE:
 968		return "vcs";
 969	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
 970		return "vecs";
 971	case XE_ENGINE_CLASS_COPY:
 972		return "bcs";
 973	case XE_ENGINE_CLASS_OTHER:
 974		return "other";
 975	case XE_ENGINE_CLASS_COMPUTE:
 976		return "ccs";
 977	case XE_ENGINE_CLASS_MAX:
 978		break;
 979	}
 980
 981	return NULL;
 982}
 983
 984u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
 985{
 986	return xe_mmio_read64_2x32(&hwe->gt->mmio, RING_TIMESTAMP(hwe->mmio_base));
 987}
 988
 989enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
 990{
 991	return engine_infos[hwe->engine_id].domain;
 992}
 993
 994static const enum xe_engine_class user_to_xe_engine_class[] = {
 995	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
 996	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
 997	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
 998	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 999	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
1000};
1001
1002/**
1003 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
1004 * @xe: xe device
1005 * @eci: engine class and instance
1006 *
1007 * This function will find a hardware engine for given engine
1008 * class and instance.
1009 *
1010 * Return: If found xe_hw_engine pointer, NULL otherwise.
1011 */
1012struct xe_hw_engine *
1013xe_hw_engine_lookup(struct xe_device *xe,
1014		    struct drm_xe_engine_class_instance eci)
1015{
1016	unsigned int idx;
1017
1018	if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
1019		return NULL;
1020
1021	if (eci.gt_id >= xe->info.gt_count)
1022		return NULL;
1023
1024	idx = array_index_nospec(eci.engine_class,
1025				 ARRAY_SIZE(user_to_xe_engine_class));
1026
1027	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
1028			       user_to_xe_engine_class[idx],
1029			       eci.engine_instance, true);
1030}
v6.8
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2021 Intel Corporation
  4 */
  5
  6#include "xe_hw_engine.h"
  7
 
 
  8#include <drm/drm_managed.h>
 
  9
 10#include "regs/xe_engine_regs.h"
 11#include "regs/xe_gt_regs.h"
 
 12#include "xe_assert.h"
 13#include "xe_bo.h"
 14#include "xe_device.h"
 15#include "xe_execlist.h"
 16#include "xe_force_wake.h"
 
 17#include "xe_gt.h"
 18#include "xe_gt_ccs_mode.h"
 
 
 19#include "xe_gt_topology.h"
 
 
 20#include "xe_hw_fence.h"
 21#include "xe_irq.h"
 22#include "xe_lrc.h"
 23#include "xe_macros.h"
 24#include "xe_mmio.h"
 25#include "xe_reg_sr.h"
 
 26#include "xe_rtp.h"
 27#include "xe_sched_job.h"
 
 28#include "xe_tuning.h"
 29#include "xe_uc_fw.h"
 30#include "xe_wa.h"
 31
 32#define MAX_MMIO_BASES 3
 33struct engine_info {
 34	const char *name;
 35	unsigned int class : 8;
 36	unsigned int instance : 8;
 
 37	enum xe_force_wake_domains domain;
 38	u32 mmio_base;
 39};
 40
 41static const struct engine_info engine_infos[] = {
 42	[XE_HW_ENGINE_RCS0] = {
 43		.name = "rcs0",
 44		.class = XE_ENGINE_CLASS_RENDER,
 45		.instance = 0,
 
 46		.domain = XE_FW_RENDER,
 47		.mmio_base = RENDER_RING_BASE,
 48	},
 49	[XE_HW_ENGINE_BCS0] = {
 50		.name = "bcs0",
 51		.class = XE_ENGINE_CLASS_COPY,
 52		.instance = 0,
 
 53		.domain = XE_FW_RENDER,
 54		.mmio_base = BLT_RING_BASE,
 55	},
 56	[XE_HW_ENGINE_BCS1] = {
 57		.name = "bcs1",
 58		.class = XE_ENGINE_CLASS_COPY,
 59		.instance = 1,
 
 60		.domain = XE_FW_RENDER,
 61		.mmio_base = XEHPC_BCS1_RING_BASE,
 62	},
 63	[XE_HW_ENGINE_BCS2] = {
 64		.name = "bcs2",
 65		.class = XE_ENGINE_CLASS_COPY,
 66		.instance = 2,
 
 67		.domain = XE_FW_RENDER,
 68		.mmio_base = XEHPC_BCS2_RING_BASE,
 69	},
 70	[XE_HW_ENGINE_BCS3] = {
 71		.name = "bcs3",
 72		.class = XE_ENGINE_CLASS_COPY,
 73		.instance = 3,
 
 74		.domain = XE_FW_RENDER,
 75		.mmio_base = XEHPC_BCS3_RING_BASE,
 76	},
 77	[XE_HW_ENGINE_BCS4] = {
 78		.name = "bcs4",
 79		.class = XE_ENGINE_CLASS_COPY,
 80		.instance = 4,
 
 81		.domain = XE_FW_RENDER,
 82		.mmio_base = XEHPC_BCS4_RING_BASE,
 83	},
 84	[XE_HW_ENGINE_BCS5] = {
 85		.name = "bcs5",
 86		.class = XE_ENGINE_CLASS_COPY,
 87		.instance = 5,
 
 88		.domain = XE_FW_RENDER,
 89		.mmio_base = XEHPC_BCS5_RING_BASE,
 90	},
 91	[XE_HW_ENGINE_BCS6] = {
 92		.name = "bcs6",
 93		.class = XE_ENGINE_CLASS_COPY,
 94		.instance = 6,
 
 95		.domain = XE_FW_RENDER,
 96		.mmio_base = XEHPC_BCS6_RING_BASE,
 97	},
 98	[XE_HW_ENGINE_BCS7] = {
 99		.name = "bcs7",
100		.class = XE_ENGINE_CLASS_COPY,
 
101		.instance = 7,
102		.domain = XE_FW_RENDER,
103		.mmio_base = XEHPC_BCS7_RING_BASE,
104	},
105	[XE_HW_ENGINE_BCS8] = {
106		.name = "bcs8",
107		.class = XE_ENGINE_CLASS_COPY,
108		.instance = 8,
 
109		.domain = XE_FW_RENDER,
110		.mmio_base = XEHPC_BCS8_RING_BASE,
111	},
112
113	[XE_HW_ENGINE_VCS0] = {
114		.name = "vcs0",
115		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
116		.instance = 0,
 
117		.domain = XE_FW_MEDIA_VDBOX0,
118		.mmio_base = BSD_RING_BASE,
119	},
120	[XE_HW_ENGINE_VCS1] = {
121		.name = "vcs1",
122		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
123		.instance = 1,
 
124		.domain = XE_FW_MEDIA_VDBOX1,
125		.mmio_base = BSD2_RING_BASE,
126	},
127	[XE_HW_ENGINE_VCS2] = {
128		.name = "vcs2",
129		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
130		.instance = 2,
 
131		.domain = XE_FW_MEDIA_VDBOX2,
132		.mmio_base = BSD3_RING_BASE,
133	},
134	[XE_HW_ENGINE_VCS3] = {
135		.name = "vcs3",
136		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
137		.instance = 3,
 
138		.domain = XE_FW_MEDIA_VDBOX3,
139		.mmio_base = BSD4_RING_BASE,
140	},
141	[XE_HW_ENGINE_VCS4] = {
142		.name = "vcs4",
143		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
144		.instance = 4,
 
145		.domain = XE_FW_MEDIA_VDBOX4,
146		.mmio_base = XEHP_BSD5_RING_BASE,
147	},
148	[XE_HW_ENGINE_VCS5] = {
149		.name = "vcs5",
150		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
151		.instance = 5,
 
152		.domain = XE_FW_MEDIA_VDBOX5,
153		.mmio_base = XEHP_BSD6_RING_BASE,
154	},
155	[XE_HW_ENGINE_VCS6] = {
156		.name = "vcs6",
157		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
158		.instance = 6,
 
159		.domain = XE_FW_MEDIA_VDBOX6,
160		.mmio_base = XEHP_BSD7_RING_BASE,
161	},
162	[XE_HW_ENGINE_VCS7] = {
163		.name = "vcs7",
164		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
165		.instance = 7,
 
166		.domain = XE_FW_MEDIA_VDBOX7,
167		.mmio_base = XEHP_BSD8_RING_BASE,
168	},
169	[XE_HW_ENGINE_VECS0] = {
170		.name = "vecs0",
171		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
172		.instance = 0,
 
173		.domain = XE_FW_MEDIA_VEBOX0,
174		.mmio_base = VEBOX_RING_BASE,
175	},
176	[XE_HW_ENGINE_VECS1] = {
177		.name = "vecs1",
178		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
179		.instance = 1,
 
180		.domain = XE_FW_MEDIA_VEBOX1,
181		.mmio_base = VEBOX2_RING_BASE,
182	},
183	[XE_HW_ENGINE_VECS2] = {
184		.name = "vecs2",
185		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
186		.instance = 2,
 
187		.domain = XE_FW_MEDIA_VEBOX2,
188		.mmio_base = XEHP_VEBOX3_RING_BASE,
189	},
190	[XE_HW_ENGINE_VECS3] = {
191		.name = "vecs3",
192		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
193		.instance = 3,
 
194		.domain = XE_FW_MEDIA_VEBOX3,
195		.mmio_base = XEHP_VEBOX4_RING_BASE,
196	},
197	[XE_HW_ENGINE_CCS0] = {
198		.name = "ccs0",
199		.class = XE_ENGINE_CLASS_COMPUTE,
200		.instance = 0,
 
201		.domain = XE_FW_RENDER,
202		.mmio_base = COMPUTE0_RING_BASE,
203	},
204	[XE_HW_ENGINE_CCS1] = {
205		.name = "ccs1",
206		.class = XE_ENGINE_CLASS_COMPUTE,
207		.instance = 1,
 
208		.domain = XE_FW_RENDER,
209		.mmio_base = COMPUTE1_RING_BASE,
210	},
211	[XE_HW_ENGINE_CCS2] = {
212		.name = "ccs2",
213		.class = XE_ENGINE_CLASS_COMPUTE,
214		.instance = 2,
 
215		.domain = XE_FW_RENDER,
216		.mmio_base = COMPUTE2_RING_BASE,
217	},
218	[XE_HW_ENGINE_CCS3] = {
219		.name = "ccs3",
220		.class = XE_ENGINE_CLASS_COMPUTE,
221		.instance = 3,
 
222		.domain = XE_FW_RENDER,
223		.mmio_base = COMPUTE3_RING_BASE,
224	},
225	[XE_HW_ENGINE_GSCCS0] = {
226		.name = "gsccs0",
227		.class = XE_ENGINE_CLASS_OTHER,
228		.instance = OTHER_GSC_INSTANCE,
229		.domain = XE_FW_GSC,
230		.mmio_base = GSCCS_RING_BASE,
231	},
232};
233
234static void hw_engine_fini(struct drm_device *drm, void *arg)
235{
236	struct xe_hw_engine *hwe = arg;
237
238	if (hwe->exl_port)
239		xe_execlist_port_destroy(hwe->exl_port);
240	xe_lrc_finish(&hwe->kernel_lrc);
241
242	hwe->gt = NULL;
243}
244
245static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
246				   u32 val)
 
 
 
 
 
 
 
 
 
 
247{
248	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
249	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
250
251	reg.addr += hwe->mmio_base;
252
253	xe_mmio_write32(hwe->gt, reg, val);
254}
255
256static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
 
 
 
 
 
 
 
 
 
 
257{
258	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
259	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
260
261	reg.addr += hwe->mmio_base;
262
263	return xe_mmio_read32(hwe->gt, reg);
264}
265
266void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
267{
268	u32 ccs_mask =
269		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
270
271	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
272		xe_mmio_write32(hwe->gt, RCU_MODE,
273				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
274
275	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
276	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
277			       xe_bo_ggtt_addr(hwe->hwsp));
278	hw_engine_mmio_write32(hwe, RING_MODE(0),
279			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
280	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
281			       _MASKED_BIT_DISABLE(STOP_RING));
282	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
283}
284
285static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
286						 const struct xe_hw_engine *hwe)
287{
288	return xe_gt_ccs_mode_enabled(gt) &&
289	       xe_rtp_match_first_render_or_compute(gt, hwe);
290}
291
 
 
 
 
 
 
 
 
 
 
 
 
 
292void
293xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
294{
295	struct xe_gt *gt = hwe->gt;
296	const u8 mocs_write_idx = gt->mocs.uc_index;
297	const u8 mocs_read_idx = gt->mocs.uc_index;
298	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
299			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
300	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
301	const struct xe_rtp_entry_sr lrc_was[] = {
302		/*
303		 * Some blitter commands do not have a field for MOCS, those
304		 * commands will use MOCS index pointed by BLIT_CCTL.
305		 * BLIT_CCTL registers are needed to be programmed to un-cached.
306		 */
307		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
308		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
309			       ENGINE_CLASS(COPY)),
310		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
311				 BLIT_CCTL_DST_MOCS_MASK |
312				 BLIT_CCTL_SRC_MOCS_MASK,
313				 blit_cctl_val,
314				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
315		},
316		/* Use Fixed slice CCS mode */
317		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
318		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
319		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
320					   RCU_MODE_FIXED_SLICE_CCS_MODE))
321		},
 
 
 
 
 
 
 
 
322		{}
323	};
324
325	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
326}
327
328static void
329hw_engine_setup_default_state(struct xe_hw_engine *hwe)
330{
331	struct xe_gt *gt = hwe->gt;
332	struct xe_device *xe = gt_to_xe(gt);
333	/*
334	 * RING_CMD_CCTL specifies the default MOCS entry that will be
335	 * used by the command streamer when executing commands that
336	 * don't have a way to explicitly specify a MOCS setting.
337	 * The default should usually reference whichever MOCS entry
338	 * corresponds to uncached behavior, although use of a WB cached
339	 * entry is recommended by the spec in certain circumstances on
340	 * specific platforms.
341	 * Bspec: 72161
342	 */
343	const u8 mocs_write_idx = gt->mocs.uc_index;
344	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
345				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
346				 gt->mocs.wb_index : gt->mocs.uc_index;
347	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
348				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
349	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
350	const struct xe_rtp_entry_sr engine_entries[] = {
351		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
352		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
353		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
354					   CMD_CCTL_WRITE_OVERRIDE_MASK |
355					   CMD_CCTL_READ_OVERRIDE_MASK,
356					   ring_cmd_cctl_val,
357					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
358		},
359		/*
360		 * To allow the GSC engine to go idle on MTL we need to enable
361		 * idle messaging and set the hysteresis value (we use 0xA=5us
362		 * as recommended in spec). On platforms after MTL this is
363		 * enabled by default.
364		 */
365		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
366		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
367		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
368				     IDLE_MSG_DISABLE,
369				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
370				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
371					   IDLE_WAIT_TIME,
372					   0xA,
373					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
374		},
 
 
 
 
 
 
375		{}
376	};
377
378	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
379}
380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
382				 enum xe_hw_engine_id id)
383{
384	const struct engine_info *info;
385
386	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
387		return;
388
389	if (!(gt->info.engine_mask & BIT(id)))
390		return;
391
392	info = &engine_infos[id];
393
394	xe_gt_assert(gt, !hwe->gt);
395
396	hwe->gt = gt;
397	hwe->class = info->class;
398	hwe->instance = info->instance;
399	hwe->mmio_base = info->mmio_base;
 
 
 
400	hwe->domain = info->domain;
401	hwe->name = info->name;
402	hwe->fence_irq = &gt->fence_irq[info->class];
403	hwe->engine_id = id;
404
405	hwe->eclass = &gt->eclass[hwe->class];
406	if (!hwe->eclass->sched_props.job_timeout_ms) {
407		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
408		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
409		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
410		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
411		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
412		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
413		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
414		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
415		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416		/* Record default props */
417		hwe->eclass->defaults = hwe->eclass->sched_props;
418	}
419
420	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
421	xe_tuning_process_engine(hwe);
422	xe_wa_process_engine(hwe);
423	hw_engine_setup_default_state(hwe);
424
425	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
426	xe_reg_whitelist_process_engine(hwe);
427}
428
429static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
430			  enum xe_hw_engine_id id)
431{
432	struct xe_device *xe = gt_to_xe(gt);
433	struct xe_tile *tile = gt_to_tile(gt);
434	int err;
435
436	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
437	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
438
439	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
440	xe_reg_sr_apply_whitelist(hwe);
441
442	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
443						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
444						 XE_BO_CREATE_GGTT_BIT);
 
445	if (IS_ERR(hwe->hwsp)) {
446		err = PTR_ERR(hwe->hwsp);
447		goto err_name;
448	}
449
450	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
451	if (err)
452		goto err_hwsp;
453
454	if (!xe_device_uc_enabled(xe)) {
455		hwe->exl_port = xe_execlist_port_create(xe, hwe);
456		if (IS_ERR(hwe->exl_port)) {
457			err = PTR_ERR(hwe->exl_port);
458			goto err_kernel_lrc;
459		}
 
 
 
 
 
 
 
460	}
461
462	if (xe_device_uc_enabled(xe))
463		xe_hw_engine_enable_ring(hwe);
464
465	/* We reserve the highest BCS instance for USM */
466	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
467		gt->usm.reserved_bcs_instance = hwe->instance;
468
469	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
470	if (err)
471		return err;
472
473	return 0;
474
475err_kernel_lrc:
476	xe_lrc_finish(&hwe->kernel_lrc);
477err_hwsp:
478	xe_bo_unpin_map_no_vm(hwe->hwsp);
479err_name:
480	hwe->name = NULL;
481
482	return err;
483}
484
485static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
486{
487	int class;
488
489	/* FIXME: Doing a simple logical mapping that works for most hardware */
490	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
491		struct xe_hw_engine *hwe;
492		enum xe_hw_engine_id id;
493		int logical_instance = 0;
494
495		for_each_hw_engine(hwe, gt, id)
496			if (hwe->class == class)
497				hwe->logical_instance = logical_instance++;
498	}
499}
500
501static void read_media_fuses(struct xe_gt *gt)
502{
503	struct xe_device *xe = gt_to_xe(gt);
504	u32 media_fuse;
505	u16 vdbox_mask;
506	u16 vebox_mask;
507	int i, j;
508
509	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
510
511	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
512
513	/*
514	 * Pre-Xe_HP platforms had register bits representing absent engines,
515	 * whereas Xe_HP and beyond have bits representing present engines.
516	 * Invert the polarity on old platforms so that we can use common
517	 * handling below.
518	 */
519	if (GRAPHICS_VERx100(xe) < 1250)
520		media_fuse = ~media_fuse;
521
522	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
523	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
524
525	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
526		if (!(gt->info.engine_mask & BIT(i)))
527			continue;
528
529		if (!(BIT(j) & vdbox_mask)) {
530			gt->info.engine_mask &= ~BIT(i);
531			drm_info(&xe->drm, "vcs%u fused off\n", j);
532		}
533	}
534
535	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
536		if (!(gt->info.engine_mask & BIT(i)))
537			continue;
538
539		if (!(BIT(j) & vebox_mask)) {
540			gt->info.engine_mask &= ~BIT(i);
541			drm_info(&xe->drm, "vecs%u fused off\n", j);
542		}
543	}
544}
545
546static void read_copy_fuses(struct xe_gt *gt)
547{
548	struct xe_device *xe = gt_to_xe(gt);
549	u32 bcs_mask;
550
551	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
552		return;
553
554	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
555
556	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
557	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
558
559	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
560	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
561		if (!(gt->info.engine_mask & BIT(i)))
562			continue;
563
564		if (!(BIT(j / 2) & bcs_mask)) {
565			gt->info.engine_mask &= ~BIT(i);
566			drm_info(&xe->drm, "bcs%u fused off\n", j);
567		}
568	}
569}
570
571static void read_compute_fuses_from_dss(struct xe_gt *gt)
572{
573	struct xe_device *xe = gt_to_xe(gt);
574
575	/*
576	 * CCS fusing based on DSS masks only applies to platforms that can
577	 * have more than one CCS.
578	 */
579	if (hweight64(gt->info.engine_mask &
580		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
581		return;
582
583	/*
584	 * CCS availability on Xe_HP is inferred from the presence of DSS in
585	 * each quadrant.
586	 */
587	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
588		if (!(gt->info.engine_mask & BIT(i)))
589			continue;
590
591		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
592			gt->info.engine_mask &= ~BIT(i);
593			drm_info(&xe->drm, "ccs%u fused off\n", j);
594		}
595	}
596}
597
598static void read_compute_fuses_from_reg(struct xe_gt *gt)
599{
600	struct xe_device *xe = gt_to_xe(gt);
601	u32 ccs_mask;
602
603	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
604	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
605
606	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
607		if (!(gt->info.engine_mask & BIT(i)))
608			continue;
609
610		if ((ccs_mask & BIT(j)) == 0) {
611			gt->info.engine_mask &= ~BIT(i);
612			drm_info(&xe->drm, "ccs%u fused off\n", j);
613		}
614	}
615}
616
617static void read_compute_fuses(struct xe_gt *gt)
618{
619	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
620		read_compute_fuses_from_reg(gt);
621	else
622		read_compute_fuses_from_dss(gt);
623}
624
625static void check_gsc_availability(struct xe_gt *gt)
626{
627	struct xe_device *xe = gt_to_xe(gt);
628
629	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
630		return;
631
632	/*
633	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
634	 * have the FW there is nothing we need the engine for and can therefore
635	 * skip its initialization.
636	 */
637	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
638		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
 
 
 
 
 
639		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
640	}
641}
642
643int xe_hw_engines_init_early(struct xe_gt *gt)
644{
645	int i;
646
647	read_media_fuses(gt);
648	read_copy_fuses(gt);
649	read_compute_fuses(gt);
650	check_gsc_availability(gt);
651
652	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
653	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
654
655	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
656		hw_engine_init_early(gt, &gt->hw_engines[i], i);
657
658	return 0;
659}
660
661int xe_hw_engines_init(struct xe_gt *gt)
662{
663	int err;
664	struct xe_hw_engine *hwe;
665	enum xe_hw_engine_id id;
666
667	for_each_hw_engine(hwe, gt, id) {
668		err = hw_engine_init(gt, hwe, id);
669		if (err)
670			return err;
671	}
672
673	hw_engine_setup_logical_mapping(gt);
 
 
 
674
675	return 0;
676}
677
678void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
679{
680	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
681
682	if (hwe->irq_handler)
683		hwe->irq_handler(hwe, intr_vec);
684
685	if (intr_vec & GT_RENDER_USER_INTERRUPT)
686		xe_hw_fence_irq_run(hwe->fence_irq);
687}
688
689/**
690 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
691 * @hwe: Xe HW Engine.
 
692 *
693 * This can be printed out in a later stage like during dev_coredump
694 * analysis.
695 *
696 * Returns: a Xe HW Engine snapshot object that must be freed by the
697 * caller, using `xe_hw_engine_snapshot_free`.
698 */
699struct xe_hw_engine_snapshot *
700xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
701{
702	struct xe_hw_engine_snapshot *snapshot;
703	int len;
704
705	if (!xe_hw_engine_is_valid(hwe))
706		return NULL;
707
708	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
709
710	if (!snapshot)
711		return NULL;
712
713	len = strlen(hwe->name) + 1;
714	snapshot->name = kzalloc(len, GFP_ATOMIC);
715	if (snapshot->name)
716		strscpy(snapshot->name, hwe->name, len);
717
718	snapshot->class = hwe->class;
719	snapshot->logical_instance = hwe->logical_instance;
720	snapshot->forcewake.domain = hwe->domain;
721	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
722						    hwe->domain);
723	snapshot->mmio_base = hwe->mmio_base;
 
724
725	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
726	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
727							   RING_HWS_PGA(0));
728	snapshot->reg.ring_execlist_status_lo =
729		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
730	snapshot->reg.ring_execlist_status_hi =
731		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
732	snapshot->reg.ring_execlist_sq_contents_lo =
733		hw_engine_mmio_read32(hwe,
734				      RING_EXECLIST_SQ_CONTENTS_LO(0));
735	snapshot->reg.ring_execlist_sq_contents_hi =
736		hw_engine_mmio_read32(hwe,
737				      RING_EXECLIST_SQ_CONTENTS_HI(0));
738	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
739	snapshot->reg.ring_head =
740		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
741	snapshot->reg.ring_tail =
742		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
743	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
744	snapshot->reg.ring_mi_mode =
745		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
746	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
747	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
748	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
749	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
750	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
751	snapshot->reg.ring_acthd_udw =
752		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
753	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
754	snapshot->reg.ring_bbaddr_udw =
755		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
756	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
757	snapshot->reg.ring_dma_fadd_udw =
758		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
759	snapshot->reg.ring_dma_fadd =
760		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
761	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
762
763	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
764		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
 
 
765
766	return snapshot;
767}
768
769/**
770 * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
771 * @snapshot: Xe HW Engine snapshot object.
772 * @p: drm_printer where it will be printed out.
773 *
774 * This function prints out a given Xe HW Engine snapshot object.
775 */
776void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
777				 struct drm_printer *p)
778{
779	if (!snapshot)
780		return;
781
782	drm_printf(p, "%s (physical), logical instance=%d\n",
783		   snapshot->name ? snapshot->name : "",
784		   snapshot->logical_instance);
785	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
786		   snapshot->forcewake.domain, snapshot->forcewake.ref);
787	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
788	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
789	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
790		   snapshot->reg.ring_execlist_status_lo);
791	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
792		   snapshot->reg.ring_execlist_status_hi);
793	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
794		   snapshot->reg.ring_execlist_sq_contents_lo);
795	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
796		   snapshot->reg.ring_execlist_sq_contents_hi);
797	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
798	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
799	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
800	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
801	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
802	drm_printf(p, "\tRING_MODE: 0x%08x\n",
803		   snapshot->reg.ring_mode);
804	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
805	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
806	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
807	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
808	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
809		   snapshot->reg.ring_acthd);
810	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
811		   snapshot->reg.ring_bbaddr);
812	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
813		   snapshot->reg.ring_dma_fadd_udw,
814		   snapshot->reg.ring_dma_fadd);
815	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
816	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
817		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
818			   snapshot->reg.rcu_mode);
819}
820
821/**
822 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
823 * @snapshot: Xe HW Engine snapshot object.
824 *
825 * This function free all the memory that needed to be allocated at capture
826 * time.
827 */
828void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
829{
 
830	if (!snapshot)
831		return;
832
 
 
 
 
 
 
 
 
833	kfree(snapshot->name);
834	kfree(snapshot);
835}
836
837/**
838 * xe_hw_engine_print - Xe HW Engine Print.
839 * @hwe: Hardware Engine.
840 * @p: drm_printer.
841 *
842 * This function quickly capture a snapshot and immediately print it out.
843 */
844void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
845{
846	struct xe_hw_engine_snapshot *snapshot;
847
848	snapshot = xe_hw_engine_snapshot_capture(hwe);
849	xe_hw_engine_snapshot_print(snapshot, p);
850	xe_hw_engine_snapshot_free(snapshot);
851}
852
853u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
854				enum xe_engine_class engine_class)
855{
856	u32 mask = 0;
857	enum xe_hw_engine_id id;
858
859	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
860		if (engine_infos[id].class == engine_class &&
861		    gt->info.engine_mask & BIT(id))
862			mask |= BIT(engine_infos[id].instance);
863	}
864	return mask;
865}
866
867bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
868{
869	struct xe_gt *gt = hwe->gt;
870	struct xe_device *xe = gt_to_xe(gt);
871
872	if (hwe->class == XE_ENGINE_CLASS_OTHER)
873		return true;
874
875	/* Check for engines disabled by ccs_mode setting */
876	if (xe_gt_ccs_mode_enabled(gt) &&
877	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
878	    hwe->logical_instance >= gt->ccs_mode)
879		return true;
880
881	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
882		hwe->instance == gt->usm.reserved_bcs_instance;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883}