Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_lrc.h"
   7
   8#include "instructions/xe_mi_commands.h"
   9#include "instructions/xe_gfxpipe_commands.h"
  10#include "regs/xe_engine_regs.h"
  11#include "regs/xe_gpu_commands.h"
  12#include "regs/xe_lrc_layout.h"
  13#include "xe_bb.h"
  14#include "xe_bo.h"
  15#include "xe_device.h"
  16#include "xe_drm_client.h"
  17#include "xe_exec_queue_types.h"
  18#include "xe_gt.h"
  19#include "xe_gt_printk.h"
  20#include "xe_hw_fence.h"
  21#include "xe_map.h"
  22#include "xe_memirq.h"
  23#include "xe_sriov.h"
  24#include "xe_vm.h"
  25
  26#define LRC_VALID				(1 << 0)
  27#define LRC_PRIVILEGE				(1 << 8)
  28#define LRC_ADDRESSING_MODE_SHIFT		3
  29#define LRC_LEGACY_64B_CONTEXT			3
  30
  31#define ENGINE_CLASS_SHIFT			61
  32#define ENGINE_INSTANCE_SHIFT			48
  33
  34static struct xe_device *
  35lrc_to_xe(struct xe_lrc *lrc)
  36{
  37	return gt_to_xe(lrc->fence_ctx.gt);
  38}
  39
  40size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
  41{
  42	switch (class) {
  43	case XE_ENGINE_CLASS_RENDER:
  44		if (GRAPHICS_VER(xe) >= 20)
  45			return 4 * SZ_4K;
  46		else
  47			return 14 * SZ_4K;
  48	case XE_ENGINE_CLASS_COMPUTE:
  49		/* 14 pages since graphics_ver == 11 */
  50		if (GRAPHICS_VER(xe) >= 20)
  51			return 3 * SZ_4K;
  52		else
  53			return 14 * SZ_4K;
  54	default:
  55		WARN(1, "Unknown engine class: %d", class);
  56		fallthrough;
  57	case XE_ENGINE_CLASS_COPY:
  58	case XE_ENGINE_CLASS_VIDEO_DECODE:
  59	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
  60	case XE_ENGINE_CLASS_OTHER:
  61		return 2 * SZ_4K;
  62	}
  63}
  64
  65/*
  66 * The per-platform tables are u8-encoded in @data. Decode @data and set the
  67 * addresses' offset and commands in @regs. The following encoding is used
  68 * for each byte. There are 2 steps: decoding commands and decoding addresses.
  69 *
  70 * Commands:
  71 * [7]: create NOPs - number of NOPs are set in lower bits
  72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
  73 *      MI_LRI_FORCE_POSTED
  74 * [5:0]: Number of NOPs or registers to set values to in case of
  75 *        MI_LOAD_REGISTER_IMM
  76 *
  77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
  78 * number of registers. They are set by using the REG/REG16 macros: the former
  79 * is used for offsets smaller than 0x200 while the latter is for values bigger
  80 * than that. Those macros already set all the bits documented below correctly:
  81 *
  82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
  83 *      follow, for the lower bits
  84 * [6:0]: Register offset, without considering the engine base.
  85 *
  86 * This function only tweaks the commands and register offsets. Values are not
  87 * filled out.
  88 */
  89static void set_offsets(u32 *regs,
  90			const u8 *data,
  91			const struct xe_hw_engine *hwe)
  92#define NOP(x) (BIT(7) | (x))
  93#define LRI(count, flags) ((flags) << 6 | (count) | \
  94			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
  95#define POSTED BIT(0)
  96#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
  97#define REG16(x) \
  98	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
  99	(((x) >> 2) & 0x7f)
 100{
 101	const u32 base = hwe->mmio_base;
 102
 103	while (*data) {
 104		u8 count, flags;
 105
 106		if (*data & BIT(7)) { /* skip */
 107			count = *data++ & ~BIT(7);
 108			regs += count;
 109			continue;
 110		}
 111
 112		count = *data & 0x3f;
 113		flags = *data >> 6;
 114		data++;
 115
 116		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
 117		if (flags & POSTED)
 118			*regs |= MI_LRI_FORCE_POSTED;
 119		*regs |= MI_LRI_LRM_CS_MMIO;
 120		regs++;
 121
 122		xe_gt_assert(hwe->gt, count);
 123		do {
 124			u32 offset = 0;
 125			u8 v;
 126
 127			do {
 128				v = *data++;
 129				offset <<= 7;
 130				offset |= v & ~BIT(7);
 131			} while (v & BIT(7));
 132
 133			regs[0] = base + (offset << 2);
 134			regs += 2;
 135		} while (--count);
 136	}
 137
 138	*regs = MI_BATCH_BUFFER_END | BIT(0);
 139}
 140
 141static const u8 gen12_xcs_offsets[] = {
 142	NOP(1),
 143	LRI(13, POSTED),
 144	REG16(0x244),
 145	REG(0x034),
 146	REG(0x030),
 147	REG(0x038),
 148	REG(0x03c),
 149	REG(0x168),
 150	REG(0x140),
 151	REG(0x110),
 152	REG(0x1c0),
 153	REG(0x1c4),
 154	REG(0x1c8),
 155	REG(0x180),
 156	REG16(0x2b4),
 157
 158	NOP(5),
 159	LRI(9, POSTED),
 160	REG16(0x3a8),
 161	REG16(0x28c),
 162	REG16(0x288),
 163	REG16(0x284),
 164	REG16(0x280),
 165	REG16(0x27c),
 166	REG16(0x278),
 167	REG16(0x274),
 168	REG16(0x270),
 169
 170	0
 171};
 172
 173static const u8 dg2_xcs_offsets[] = {
 174	NOP(1),
 175	LRI(15, POSTED),
 176	REG16(0x244),
 177	REG(0x034),
 178	REG(0x030),
 179	REG(0x038),
 180	REG(0x03c),
 181	REG(0x168),
 182	REG(0x140),
 183	REG(0x110),
 184	REG(0x1c0),
 185	REG(0x1c4),
 186	REG(0x1c8),
 187	REG(0x180),
 188	REG16(0x2b4),
 189	REG(0x120),
 190	REG(0x124),
 191
 192	NOP(1),
 193	LRI(9, POSTED),
 194	REG16(0x3a8),
 195	REG16(0x28c),
 196	REG16(0x288),
 197	REG16(0x284),
 198	REG16(0x280),
 199	REG16(0x27c),
 200	REG16(0x278),
 201	REG16(0x274),
 202	REG16(0x270),
 203
 204	0
 205};
 206
 207static const u8 gen12_rcs_offsets[] = {
 208	NOP(1),
 209	LRI(13, POSTED),
 210	REG16(0x244),
 211	REG(0x034),
 212	REG(0x030),
 213	REG(0x038),
 214	REG(0x03c),
 215	REG(0x168),
 216	REG(0x140),
 217	REG(0x110),
 218	REG(0x1c0),
 219	REG(0x1c4),
 220	REG(0x1c8),
 221	REG(0x180),
 222	REG16(0x2b4),
 223
 224	NOP(5),
 225	LRI(9, POSTED),
 226	REG16(0x3a8),
 227	REG16(0x28c),
 228	REG16(0x288),
 229	REG16(0x284),
 230	REG16(0x280),
 231	REG16(0x27c),
 232	REG16(0x278),
 233	REG16(0x274),
 234	REG16(0x270),
 235
 236	LRI(3, POSTED),
 237	REG(0x1b0),
 238	REG16(0x5a8),
 239	REG16(0x5ac),
 240
 241	NOP(6),
 242	LRI(1, 0),
 243	REG(0x0c8),
 244	NOP(3 + 9 + 1),
 245
 246	LRI(51, POSTED),
 247	REG16(0x588),
 248	REG16(0x588),
 249	REG16(0x588),
 250	REG16(0x588),
 251	REG16(0x588),
 252	REG16(0x588),
 253	REG(0x028),
 254	REG(0x09c),
 255	REG(0x0c0),
 256	REG(0x178),
 257	REG(0x17c),
 258	REG16(0x358),
 259	REG(0x170),
 260	REG(0x150),
 261	REG(0x154),
 262	REG(0x158),
 263	REG16(0x41c),
 264	REG16(0x600),
 265	REG16(0x604),
 266	REG16(0x608),
 267	REG16(0x60c),
 268	REG16(0x610),
 269	REG16(0x614),
 270	REG16(0x618),
 271	REG16(0x61c),
 272	REG16(0x620),
 273	REG16(0x624),
 274	REG16(0x628),
 275	REG16(0x62c),
 276	REG16(0x630),
 277	REG16(0x634),
 278	REG16(0x638),
 279	REG16(0x63c),
 280	REG16(0x640),
 281	REG16(0x644),
 282	REG16(0x648),
 283	REG16(0x64c),
 284	REG16(0x650),
 285	REG16(0x654),
 286	REG16(0x658),
 287	REG16(0x65c),
 288	REG16(0x660),
 289	REG16(0x664),
 290	REG16(0x668),
 291	REG16(0x66c),
 292	REG16(0x670),
 293	REG16(0x674),
 294	REG16(0x678),
 295	REG16(0x67c),
 296	REG(0x068),
 297	REG(0x084),
 298	NOP(1),
 299
 300	0
 301};
 302
 303static const u8 xehp_rcs_offsets[] = {
 304	NOP(1),
 305	LRI(13, POSTED),
 306	REG16(0x244),
 307	REG(0x034),
 308	REG(0x030),
 309	REG(0x038),
 310	REG(0x03c),
 311	REG(0x168),
 312	REG(0x140),
 313	REG(0x110),
 314	REG(0x1c0),
 315	REG(0x1c4),
 316	REG(0x1c8),
 317	REG(0x180),
 318	REG16(0x2b4),
 319
 320	NOP(5),
 321	LRI(9, POSTED),
 322	REG16(0x3a8),
 323	REG16(0x28c),
 324	REG16(0x288),
 325	REG16(0x284),
 326	REG16(0x280),
 327	REG16(0x27c),
 328	REG16(0x278),
 329	REG16(0x274),
 330	REG16(0x270),
 331
 332	LRI(3, POSTED),
 333	REG(0x1b0),
 334	REG16(0x5a8),
 335	REG16(0x5ac),
 336
 337	NOP(6),
 338	LRI(1, 0),
 339	REG(0x0c8),
 340
 341	0
 342};
 343
 344static const u8 dg2_rcs_offsets[] = {
 345	NOP(1),
 346	LRI(15, POSTED),
 347	REG16(0x244),
 348	REG(0x034),
 349	REG(0x030),
 350	REG(0x038),
 351	REG(0x03c),
 352	REG(0x168),
 353	REG(0x140),
 354	REG(0x110),
 355	REG(0x1c0),
 356	REG(0x1c4),
 357	REG(0x1c8),
 358	REG(0x180),
 359	REG16(0x2b4),
 360	REG(0x120),
 361	REG(0x124),
 362
 363	NOP(1),
 364	LRI(9, POSTED),
 365	REG16(0x3a8),
 366	REG16(0x28c),
 367	REG16(0x288),
 368	REG16(0x284),
 369	REG16(0x280),
 370	REG16(0x27c),
 371	REG16(0x278),
 372	REG16(0x274),
 373	REG16(0x270),
 374
 375	LRI(3, POSTED),
 376	REG(0x1b0),
 377	REG16(0x5a8),
 378	REG16(0x5ac),
 379
 380	NOP(6),
 381	LRI(1, 0),
 382	REG(0x0c8),
 383
 384	0
 385};
 386
 387static const u8 mtl_rcs_offsets[] = {
 388	NOP(1),
 389	LRI(15, POSTED),
 390	REG16(0x244),
 391	REG(0x034),
 392	REG(0x030),
 393	REG(0x038),
 394	REG(0x03c),
 395	REG(0x168),
 396	REG(0x140),
 397	REG(0x110),
 398	REG(0x1c0),
 399	REG(0x1c4),
 400	REG(0x1c8),
 401	REG(0x180),
 402	REG16(0x2b4),
 403	REG(0x120),
 404	REG(0x124),
 405
 406	NOP(1),
 407	LRI(9, POSTED),
 408	REG16(0x3a8),
 409	REG16(0x28c),
 410	REG16(0x288),
 411	REG16(0x284),
 412	REG16(0x280),
 413	REG16(0x27c),
 414	REG16(0x278),
 415	REG16(0x274),
 416	REG16(0x270),
 417
 418	NOP(2),
 419	LRI(2, POSTED),
 420	REG16(0x5a8),
 421	REG16(0x5ac),
 422
 423	NOP(6),
 424	LRI(1, 0),
 425	REG(0x0c8),
 426
 427	0
 428};
 429
 430#define XE2_CTX_COMMON \
 431	NOP(1),                 /* [0x00] */ \
 432	LRI(15, POSTED),        /* [0x01] */ \
 433	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
 434	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
 435	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
 436	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
 437	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
 438	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
 439	REG(0x140),             /* [0x0e] BB_ADDR */ \
 440	REG(0x110),             /* [0x10] BB_STATE */ \
 441	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
 442	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
 443	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
 444	REG(0x180),             /* [0x18] CCID */ \
 445	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
 446	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
 447	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
 448	\
 449	NOP(1),                 /* [0x20] */ \
 450	LRI(9, POSTED),         /* [0x21] */ \
 451	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
 452	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
 453	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
 454	REG16(0x284),           /* [0x28] dummy reg */ \
 455	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
 456	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
 457	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
 458	REG16(0x274),           /* [0x30] PTBP_UDW */ \
 459	REG16(0x270)            /* [0x32] PTBP_LDW */
 460
 461static const u8 xe2_rcs_offsets[] = {
 462	XE2_CTX_COMMON,
 463
 464	NOP(2),                 /* [0x34] */
 465	LRI(2, POSTED),         /* [0x36] */
 466	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
 467	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
 468
 469	NOP(6),                 /* [0x41] */
 470	LRI(1, 0),              /* [0x47] */
 471	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
 472
 473	0
 474};
 475
 476static const u8 xe2_bcs_offsets[] = {
 477	XE2_CTX_COMMON,
 478
 479	NOP(4 + 8 + 1),         /* [0x34] */
 480	LRI(2, POSTED),         /* [0x41] */
 481	REG16(0x200),           /* [0x42] BCS_SWCTRL */
 482	REG16(0x204),           /* [0x44] BLIT_CCTL */
 483
 484	0
 485};
 486
 487static const u8 xe2_xcs_offsets[] = {
 488	XE2_CTX_COMMON,
 489
 490	0
 491};
 492
 493#undef REG16
 494#undef REG
 495#undef LRI
 496#undef NOP
 497
 498static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 499{
 500	if (class == XE_ENGINE_CLASS_RENDER) {
 501		if (GRAPHICS_VER(xe) >= 20)
 502			return xe2_rcs_offsets;
 503		else if (GRAPHICS_VERx100(xe) >= 1270)
 504			return mtl_rcs_offsets;
 505		else if (GRAPHICS_VERx100(xe) >= 1255)
 506			return dg2_rcs_offsets;
 507		else if (GRAPHICS_VERx100(xe) >= 1250)
 508			return xehp_rcs_offsets;
 509		else
 510			return gen12_rcs_offsets;
 511	} else if (class == XE_ENGINE_CLASS_COPY) {
 512		if (GRAPHICS_VER(xe) >= 20)
 513			return xe2_bcs_offsets;
 514		else
 515			return gen12_xcs_offsets;
 516	} else {
 517		if (GRAPHICS_VER(xe) >= 20)
 518			return xe2_xcs_offsets;
 519		else if (GRAPHICS_VERx100(xe) >= 1255)
 520			return dg2_xcs_offsets;
 521		else
 522			return gen12_xcs_offsets;
 523	}
 524}
 525
 526static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 527{
 528	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 529						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 530
 531	/* TODO: Timestamp */
 532}
 533
 534static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
 535{
 536	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
 537	struct xe_device *xe = gt_to_xe(hwe->gt);
 538
 539	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
 540		return;
 541
 542	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
 543					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
 544	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
 545	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
 546
 547	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
 548				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
 549	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
 550	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
 551	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
 552	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
 553}
 554
 555static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
 556{
 557	struct xe_device *xe = gt_to_xe(hwe->gt);
 558
 559	if (GRAPHICS_VERx100(xe) >= 1250)
 560		return 0x70;
 561	else
 562		return 0x60;
 563}
 564
 565static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
 566{
 567	int x;
 568
 569	x = lrc_ring_mi_mode(hwe);
 570	regs[x + 1] &= ~STOP_RING;
 571	regs[x + 1] |= STOP_RING << 16;
 572}
 573
 574static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
 575{
 576	return 0;
 577}
 578
 579u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
 580{
 581	return lrc->ring.size;
 582}
 583
 584/* Make the magic macros work */
 585#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
 586
 587#define LRC_SEQNO_PPHWSP_OFFSET 512
 588#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
 589#define LRC_PARALLEL_PPHWSP_OFFSET 2048
 590#define LRC_PPHWSP_SIZE SZ_4K
 591
 592static size_t lrc_reg_size(struct xe_device *xe)
 593{
 594	if (GRAPHICS_VERx100(xe) >= 1250)
 595		return 96 * sizeof(u32);
 596	else
 597		return 80 * sizeof(u32);
 598}
 599
 600size_t xe_lrc_skip_size(struct xe_device *xe)
 601{
 602	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
 603}
 604
 605static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
 606{
 607	/* The seqno is stored in the driver-defined portion of PPHWSP */
 608	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
 609}
 610
 611static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
 612{
 613	/* The start seqno is stored in the driver-defined portion of PPHWSP */
 614	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
 615}
 616
 617static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
 618{
 619	/* The parallel is stored in the driver-defined portion of PPHWSP */
 620	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
 621}
 622
 623static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
 624{
 625	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
 626}
 627
 628#define DECL_MAP_ADDR_HELPERS(elem) \
 629static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 630{ \
 631	struct iosys_map map = lrc->bo->vmap; \
 632\
 633	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
 634	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
 635	return map; \
 636} \
 637static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
 638{ \
 639	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
 640} \
 641
 642DECL_MAP_ADDR_HELPERS(ring)
 643DECL_MAP_ADDR_HELPERS(pphwsp)
 644DECL_MAP_ADDR_HELPERS(seqno)
 645DECL_MAP_ADDR_HELPERS(regs)
 646DECL_MAP_ADDR_HELPERS(start_seqno)
 647DECL_MAP_ADDR_HELPERS(parallel)
 648
 649#undef DECL_MAP_ADDR_HELPERS
 650
 651u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
 652{
 653	return __xe_lrc_pphwsp_ggtt_addr(lrc);
 654}
 655
 656u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
 657{
 658	struct xe_device *xe = lrc_to_xe(lrc);
 659	struct iosys_map map;
 660
 661	map = __xe_lrc_regs_map(lrc);
 662	iosys_map_incr(&map, reg_nr * sizeof(u32));
 663	return xe_map_read32(xe, &map);
 664}
 665
 666void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
 667{
 668	struct xe_device *xe = lrc_to_xe(lrc);
 669	struct iosys_map map;
 670
 671	map = __xe_lrc_regs_map(lrc);
 672	iosys_map_incr(&map, reg_nr * sizeof(u32));
 673	xe_map_write32(xe, &map, val);
 674}
 675
 676static void *empty_lrc_data(struct xe_hw_engine *hwe)
 677{
 678	struct xe_device *xe = gt_to_xe(hwe->gt);
 679	void *data;
 680	u32 *regs;
 681
 682	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
 683	if (!data)
 684		return NULL;
 685
 686	/* 1st page: Per-Process of HW status Page */
 687	regs = data + LRC_PPHWSP_SIZE;
 688	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
 689	set_context_control(regs, hwe);
 690	set_memory_based_intr(regs, hwe);
 691	reset_stop_ring(regs, hwe);
 692
 693	return data;
 694}
 695
 696static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
 697{
 698	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
 699
 700	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
 701	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
 702}
 703
 704#define PVC_CTX_ASID		(0x2e + 1)
 705#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
 706
 707int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 708		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
 709{
 710	struct xe_gt *gt = hwe->gt;
 711	struct xe_tile *tile = gt_to_tile(gt);
 712	struct xe_device *xe = gt_to_xe(gt);
 713	struct iosys_map map;
 714	void *init_data = NULL;
 715	u32 arb_enable;
 716	int err;
 717
 718	lrc->flags = 0;
 719
 720	/*
 721	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
 722	 * via VM bind calls.
 723	 */
 724	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
 725				      ring_size + xe_lrc_size(xe, hwe->class),
 726				      ttm_bo_type_kernel,
 727				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
 728				      XE_BO_CREATE_GGTT_BIT);
 729	if (IS_ERR(lrc->bo))
 730		return PTR_ERR(lrc->bo);
 731
 732	lrc->tile = gt_to_tile(hwe->gt);
 733	lrc->ring.size = ring_size;
 734	lrc->ring.tail = 0;
 735
 736	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
 737			     hwe->fence_irq, hwe->name);
 738
 739	if (!gt->default_lrc[hwe->class]) {
 740		init_data = empty_lrc_data(hwe);
 741		if (!init_data) {
 742			err = -ENOMEM;
 743			goto err_lrc_finish;
 744		}
 745	}
 746
 747	/*
 748	 * Init Per-Process of HW status Page, LRC / context state to known
 749	 * values
 750	 */
 751	map = __xe_lrc_pphwsp_map(lrc);
 752	if (!init_data) {
 753		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
 754		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
 755				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
 756				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
 757	} else {
 758		xe_map_memcpy_to(xe, &map, 0, init_data,
 759				 xe_lrc_size(xe, hwe->class));
 760		kfree(init_data);
 761	}
 762
 763	if (vm) {
 764		xe_lrc_set_ppgtt(lrc, vm);
 765
 766		if (vm->xef)
 767			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
 768	}
 769
 770	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
 771	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
 772	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 773	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
 774			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
 775	if (xe->info.has_asid && vm)
 776		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
 777
 778	lrc->desc = LRC_VALID;
 779	lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
 780	/* TODO: Priority */
 781
 782	/* While this appears to have something about privileged batches or
 783	 * some such, it really just means PPGTT mode.
 784	 */
 785	if (vm)
 786		lrc->desc |= LRC_PRIVILEGE;
 787
 788	if (GRAPHICS_VERx100(xe) < 1250) {
 789		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
 790		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
 791	}
 792
 793	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 794	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
 795
 796	map = __xe_lrc_seqno_map(lrc);
 797	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 798
 799	map = __xe_lrc_start_seqno_map(lrc);
 800	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 801
 802	return 0;
 803
 804err_lrc_finish:
 805	xe_lrc_finish(lrc);
 806	return err;
 807}
 808
 809void xe_lrc_finish(struct xe_lrc *lrc)
 810{
 811	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
 812	xe_bo_lock(lrc->bo, false);
 813	xe_bo_unpin(lrc->bo);
 814	xe_bo_unlock(lrc->bo);
 815	xe_bo_put(lrc->bo);
 816}
 817
 818void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
 819{
 820	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
 821}
 822
 823u32 xe_lrc_ring_head(struct xe_lrc *lrc)
 824{
 825	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
 826}
 827
 828u32 xe_lrc_ring_space(struct xe_lrc *lrc)
 829{
 830	const u32 head = xe_lrc_ring_head(lrc);
 831	const u32 tail = lrc->ring.tail;
 832	const u32 size = lrc->ring.size;
 833
 834	return ((head - tail - 1) & (size - 1)) + 1;
 835}
 836
 837static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
 838				const void *data, size_t size)
 839{
 840	struct xe_device *xe = lrc_to_xe(lrc);
 841
 842	iosys_map_incr(&ring, lrc->ring.tail);
 843	xe_map_memcpy_to(xe, &ring, 0, data, size);
 844	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
 845}
 846
 847void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
 848{
 849	struct xe_device *xe = lrc_to_xe(lrc);
 850	struct iosys_map ring;
 851	u32 rhs;
 852	size_t aligned_size;
 853
 854	xe_assert(xe, IS_ALIGNED(size, 4));
 855	aligned_size = ALIGN(size, 8);
 856
 857	ring = __xe_lrc_ring_map(lrc);
 858
 859	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
 860	rhs = lrc->ring.size - lrc->ring.tail;
 861	if (size > rhs) {
 862		__xe_lrc_write_ring(lrc, ring, data, rhs);
 863		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
 864	} else {
 865		__xe_lrc_write_ring(lrc, ring, data, size);
 866	}
 867
 868	if (aligned_size > size) {
 869		u32 noop = MI_NOOP;
 870
 871		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
 872	}
 873}
 874
 875u64 xe_lrc_descriptor(struct xe_lrc *lrc)
 876{
 877	return lrc->desc | xe_lrc_ggtt_addr(lrc);
 878}
 879
 880u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
 881{
 882	return __xe_lrc_seqno_ggtt_addr(lrc);
 883}
 884
 885struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
 886{
 887	return &xe_hw_fence_create(&lrc->fence_ctx,
 888				   __xe_lrc_seqno_map(lrc))->dma;
 889}
 890
 891s32 xe_lrc_seqno(struct xe_lrc *lrc)
 892{
 893	struct iosys_map map = __xe_lrc_seqno_map(lrc);
 894
 895	return xe_map_read32(lrc_to_xe(lrc), &map);
 896}
 897
 898s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
 899{
 900	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
 901
 902	return xe_map_read32(lrc_to_xe(lrc), &map);
 903}
 904
 905u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
 906{
 907	return __xe_lrc_start_seqno_ggtt_addr(lrc);
 908}
 909
 910u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
 911{
 912	return __xe_lrc_parallel_ggtt_addr(lrc);
 913}
 914
 915struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
 916{
 917	return __xe_lrc_parallel_map(lrc);
 918}
 919
 920static int instr_dw(u32 cmd_header)
 921{
 922	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
 923	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
 924	    GFXPIPE_SINGLE_DW_CMD(0, 0))
 925		return 1;
 926
 927	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
 928	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
 929		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
 930
 931	/* Most instructions have the # of dwords (minus 2) in 7:0 */
 932	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
 933}
 934
 935static int dump_mi_command(struct drm_printer *p,
 936			   struct xe_gt *gt,
 937			   u32 *dw,
 938			   int remaining_dw)
 939{
 940	u32 inst_header = *dw;
 941	u32 numdw = instr_dw(inst_header);
 942	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
 943	int num_noop;
 944
 945	/* First check for commands that don't have/use a '# DW' field */
 946	switch (inst_header & MI_OPCODE) {
 947	case MI_NOOP:
 948		num_noop = 1;
 949		while (num_noop < remaining_dw &&
 950		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
 951			num_noop++;
 952		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
 953		return num_noop;
 954
 955	case MI_TOPOLOGY_FILTER:
 956		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
 957		return 1;
 958
 959	case MI_BATCH_BUFFER_END:
 960		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
 961		/* Return 'remaining_dw' to consume the rest of the LRC */
 962		return remaining_dw;
 963	}
 964
 965	/*
 966	 * Any remaining commands include a # of dwords.  We should make sure
 967	 * it doesn't exceed the remaining size of the LRC.
 968	 */
 969	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
 970		numdw = remaining_dw;
 971
 972	switch (inst_header & MI_OPCODE) {
 973	case MI_LOAD_REGISTER_IMM:
 974		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
 975			   inst_header, (numdw - 1) / 2);
 976		for (int i = 1; i < numdw; i += 2)
 977			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
 978		return numdw;
 979
 980	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
 981		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
 982			   inst_header,
 983			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
 984			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
 985		if (numdw == 4)
 986			drm_printf(p, " - %#6x = %#010llx\n",
 987				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
 988		else
 989			drm_printf(p, " - %*ph (%s)\n",
 990				   (int)sizeof(u32) * (numdw - 1), dw + 1,
 991				   numdw < 4 ? "truncated" : "malformed");
 992		return numdw;
 993
 994	case MI_FORCE_WAKEUP:
 995		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
 996		return numdw;
 997
 998	default:
 999		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1000			   inst_header, opcode, numdw);
1001		return numdw;
1002	}
1003}
1004
1005static int dump_gfxpipe_command(struct drm_printer *p,
1006				struct xe_gt *gt,
1007				u32 *dw,
1008				int remaining_dw)
1009{
1010	u32 numdw = instr_dw(*dw);
1011	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1012	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1013	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1014
1015	/*
1016	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1017	 * remaining size of the LRC.
1018	 */
1019	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1020		numdw = remaining_dw;
1021
1022	switch (*dw & GFXPIPE_MATCH_MASK) {
1023#define MATCH(cmd) \
1024	case cmd: \
1025		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1026		return numdw
1027#define MATCH3D(cmd) \
1028	case CMD_##cmd: \
1029		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1030		return numdw
1031
1032	MATCH(STATE_BASE_ADDRESS);
1033	MATCH(STATE_SIP);
1034	MATCH(GPGPU_CSR_BASE_ADDRESS);
1035	MATCH(STATE_COMPUTE_MODE);
1036	MATCH3D(3DSTATE_BTD);
1037
1038	MATCH3D(3DSTATE_VF_STATISTICS);
1039
1040	MATCH(PIPELINE_SELECT);
1041
1042	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1043	MATCH3D(3DSTATE_CLEAR_PARAMS);
1044	MATCH3D(3DSTATE_DEPTH_BUFFER);
1045	MATCH3D(3DSTATE_STENCIL_BUFFER);
1046	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1047	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1048	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1049	MATCH3D(3DSTATE_INDEX_BUFFER);
1050	MATCH3D(3DSTATE_VF);
1051	MATCH3D(3DSTATE_MULTISAMPLE);
1052	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1053	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1054	MATCH3D(3DSTATE_VS);
1055	MATCH3D(3DSTATE_GS);
1056	MATCH3D(3DSTATE_CLIP);
1057	MATCH3D(3DSTATE_SF);
1058	MATCH3D(3DSTATE_WM);
1059	MATCH3D(3DSTATE_CONSTANT_VS);
1060	MATCH3D(3DSTATE_CONSTANT_GS);
1061	MATCH3D(3DSTATE_SAMPLE_MASK);
1062	MATCH3D(3DSTATE_CONSTANT_HS);
1063	MATCH3D(3DSTATE_CONSTANT_DS);
1064	MATCH3D(3DSTATE_HS);
1065	MATCH3D(3DSTATE_TE);
1066	MATCH3D(3DSTATE_DS);
1067	MATCH3D(3DSTATE_STREAMOUT);
1068	MATCH3D(3DSTATE_SBE);
1069	MATCH3D(3DSTATE_PS);
1070	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1071	MATCH3D(3DSTATE_CPS_POINTERS);
1072	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1073	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1074	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1075	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1076	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1077	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1078	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1079	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1080	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1081	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1082	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1083	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1084	MATCH3D(3DSTATE_VF_INSTANCING);
1085	MATCH3D(3DSTATE_VF_SGVS);
1086	MATCH3D(3DSTATE_VF_TOPOLOGY);
1087	MATCH3D(3DSTATE_WM_CHROMAKEY);
1088	MATCH3D(3DSTATE_PS_BLEND);
1089	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1090	MATCH3D(3DSTATE_PS_EXTRA);
1091	MATCH3D(3DSTATE_RASTER);
1092	MATCH3D(3DSTATE_SBE_SWIZ);
1093	MATCH3D(3DSTATE_WM_HZ_OP);
1094	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1095	MATCH3D(3DSTATE_VF_SGVS_2);
1096	MATCH3D(3DSTATE_VFG);
1097	MATCH3D(3DSTATE_URB_ALLOC_VS);
1098	MATCH3D(3DSTATE_URB_ALLOC_HS);
1099	MATCH3D(3DSTATE_URB_ALLOC_DS);
1100	MATCH3D(3DSTATE_URB_ALLOC_GS);
1101	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1102	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1103	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1104	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1105	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1106	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1107	MATCH3D(3DSTATE_AMFS);
1108	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1109	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1110	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1111	MATCH3D(3DSTATE_MESH_CONTROL);
1112	MATCH3D(3DSTATE_MESH_DISTRIB);
1113	MATCH3D(3DSTATE_TASK_REDISTRIB);
1114	MATCH3D(3DSTATE_MESH_SHADER);
1115	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1116	MATCH3D(3DSTATE_TASK_CONTROL);
1117	MATCH3D(3DSTATE_TASK_SHADER);
1118	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1119	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1120	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1121	MATCH3D(3DSTATE_CLIP_MESH);
1122	MATCH3D(3DSTATE_SBE_MESH);
1123	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1124
1125	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1126	MATCH3D(3DSTATE_CHROMA_KEY);
1127	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1128	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1129	MATCH3D(3DSTATE_LINE_STIPPLE);
1130	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1131	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1132	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1133	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1134	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1135	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1136	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1137	MATCH3D(3DSTATE_SO_DECL_LIST);
1138	MATCH3D(3DSTATE_SO_BUFFER);
1139	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1140	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1141	MATCH3D(3DSTATE_3D_MODE);
1142	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1143	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1144	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1145
1146	default:
1147		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1148			   *dw, pipeline, opcode, subopcode, numdw);
1149		return numdw;
1150	}
1151}
1152
1153void xe_lrc_dump_default(struct drm_printer *p,
1154			 struct xe_gt *gt,
1155			 enum xe_engine_class hwe_class)
1156{
1157	u32 *dw;
1158	int remaining_dw, num_dw;
1159
1160	if (!gt->default_lrc[hwe_class]) {
1161		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1162		return;
1163	}
1164
1165	/*
1166	 * Skip the beginning of the LRC since it contains the per-process
1167	 * hardware status page.
1168	 */
1169	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1170	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
1171
1172	while (remaining_dw > 0) {
1173		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1174			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1175		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1176			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1177		} else {
1178			num_dw = min(instr_dw(*dw), remaining_dw);
1179			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1180				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1181				   num_dw);
1182		}
1183
1184		dw += num_dw;
1185		remaining_dw -= num_dw;
1186	}
1187}
1188
1189struct instr_state {
1190	u32 instr;
1191	u16 num_dw;
1192};
1193
1194static const struct instr_state xe_hpg_svg_state[] = {
1195	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1196	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1197	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1198	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1199	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1200	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1201	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1202	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1203	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1204	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1205	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1206	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1207	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1208	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1209	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1210	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1211	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1212	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1213	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1214	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1215	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1216	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1217	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1218	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1219	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1220	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1221	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1222	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1223	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1224	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1225	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1226	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1227	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1228	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1229	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1230	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1231	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1232	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1233	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1234	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1235	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1236	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1237	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1238	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1239	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1240	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1241	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1242	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1243	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1244	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1245};
1246
1247void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1248{
1249	struct xe_gt *gt = q->hwe->gt;
1250	struct xe_device *xe = gt_to_xe(gt);
1251	const struct instr_state *state_table = NULL;
1252	int state_table_size = 0;
1253
1254	/*
1255	 * At the moment we only need to emit non-register state for the RCS
1256	 * engine.
1257	 */
1258	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1259		return;
1260
1261	switch (GRAPHICS_VERx100(xe)) {
1262	case 1255:
1263	case 1270 ... 2004:
1264		state_table = xe_hpg_svg_state;
1265		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1266		break;
1267	default:
1268		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1269			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1270		return;
1271	}
1272
1273	for (int i = 0; i < state_table_size; i++) {
1274		u32 instr = state_table[i].instr;
1275		u16 num_dw = state_table[i].num_dw;
1276		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1277
1278		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1279		xe_gt_assert(gt, num_dw != 0);
1280		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1281
1282		/*
1283		 * Xe2's SVG context is the same as the one on DG2 / MTL
1284		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1285		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1286		 * Just make the replacement here rather than defining a
1287		 * whole separate table for the single trivial change.
1288		 */
1289		if (GRAPHICS_VER(xe) >= 20 &&
1290		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1291			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1292
1293		bb->cs[bb->len] = instr;
1294		if (!is_single_dw)
1295			bb->cs[bb->len] |= (num_dw - 2);
1296
1297		bb->len += num_dw;
1298	}
1299}