Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2021 Intel Corporation
   4 */
   5
   6#include "xe_lrc.h"
   7
   8#include <generated/xe_wa_oob.h>
   9
  10#include <linux/ascii85.h>
  11
  12#include "instructions/xe_mi_commands.h"
  13#include "instructions/xe_gfxpipe_commands.h"
  14#include "instructions/xe_gfx_state_commands.h"
  15#include "regs/xe_engine_regs.h"
  16#include "regs/xe_lrc_layout.h"
  17#include "xe_bb.h"
  18#include "xe_bo.h"
  19#include "xe_device.h"
  20#include "xe_drm_client.h"
  21#include "xe_exec_queue_types.h"
  22#include "xe_gt.h"
  23#include "xe_gt_printk.h"
  24#include "xe_hw_fence.h"
  25#include "xe_map.h"
  26#include "xe_memirq.h"
  27#include "xe_sriov.h"
  28#include "xe_vm.h"
  29#include "xe_wa.h"
  30
  31#define LRC_VALID				BIT_ULL(0)
  32#define LRC_PRIVILEGE				BIT_ULL(8)
  33#define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
  34#define LRC_LEGACY_64B_CONTEXT			3
  35
  36#define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
  37#define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
  38
  39#define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
  40
  41static struct xe_device *
  42lrc_to_xe(struct xe_lrc *lrc)
  43{
  44	return gt_to_xe(lrc->fence_ctx.gt);
  45}
  46
  47size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
  48{
  49	struct xe_device *xe = gt_to_xe(gt);
  50	size_t size;
  51
  52	switch (class) {
  53	case XE_ENGINE_CLASS_RENDER:
  54		if (GRAPHICS_VER(xe) >= 20)
  55			size = 4 * SZ_4K;
  56		else
  57			size = 14 * SZ_4K;
  58		break;
  59	case XE_ENGINE_CLASS_COMPUTE:
  60		/* 14 pages since graphics_ver == 11 */
  61		if (GRAPHICS_VER(xe) >= 20)
  62			size = 3 * SZ_4K;
  63		else
  64			size = 14 * SZ_4K;
  65		break;
  66	default:
  67		WARN(1, "Unknown engine class: %d", class);
  68		fallthrough;
  69	case XE_ENGINE_CLASS_COPY:
  70	case XE_ENGINE_CLASS_VIDEO_DECODE:
  71	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
  72	case XE_ENGINE_CLASS_OTHER:
  73		size = 2 * SZ_4K;
  74	}
  75
  76	/* Add indirect ring state page */
  77	if (xe_gt_has_indirect_ring_state(gt))
  78		size += LRC_INDIRECT_RING_STATE_SIZE;
  79
  80	return size;
  81}
  82
  83/*
  84 * The per-platform tables are u8-encoded in @data. Decode @data and set the
  85 * addresses' offset and commands in @regs. The following encoding is used
  86 * for each byte. There are 2 steps: decoding commands and decoding addresses.
  87 *
  88 * Commands:
  89 * [7]: create NOPs - number of NOPs are set in lower bits
  90 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
  91 *      MI_LRI_FORCE_POSTED
  92 * [5:0]: Number of NOPs or registers to set values to in case of
  93 *        MI_LOAD_REGISTER_IMM
  94 *
  95 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
  96 * number of registers. They are set by using the REG/REG16 macros: the former
  97 * is used for offsets smaller than 0x200 while the latter is for values bigger
  98 * than that. Those macros already set all the bits documented below correctly:
  99 *
 100 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
 101 *      follow, for the lower bits
 102 * [6:0]: Register offset, without considering the engine base.
 103 *
 104 * This function only tweaks the commands and register offsets. Values are not
 105 * filled out.
 106 */
 107static void set_offsets(u32 *regs,
 108			const u8 *data,
 109			const struct xe_hw_engine *hwe)
 110#define NOP(x) (BIT(7) | (x))
 111#define LRI(count, flags) ((flags) << 6 | (count) | \
 112			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
 113#define POSTED BIT(0)
 114#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
 115#define REG16(x) \
 116	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
 117	(((x) >> 2) & 0x7f)
 118{
 119	const u32 base = hwe->mmio_base;
 120
 121	while (*data) {
 122		u8 count, flags;
 123
 124		if (*data & BIT(7)) { /* skip */
 125			count = *data++ & ~BIT(7);
 126			regs += count;
 127			continue;
 128		}
 129
 130		count = *data & 0x3f;
 131		flags = *data >> 6;
 132		data++;
 133
 134		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
 135		if (flags & POSTED)
 136			*regs |= MI_LRI_FORCE_POSTED;
 137		*regs |= MI_LRI_LRM_CS_MMIO;
 138		regs++;
 139
 140		xe_gt_assert(hwe->gt, count);
 141		do {
 142			u32 offset = 0;
 143			u8 v;
 144
 145			do {
 146				v = *data++;
 147				offset <<= 7;
 148				offset |= v & ~BIT(7);
 149			} while (v & BIT(7));
 150
 151			regs[0] = base + (offset << 2);
 152			regs += 2;
 153		} while (--count);
 154	}
 155
 156	*regs = MI_BATCH_BUFFER_END | BIT(0);
 157}
 158
 159static const u8 gen12_xcs_offsets[] = {
 160	NOP(1),
 161	LRI(13, POSTED),
 162	REG16(0x244),
 163	REG(0x034),
 164	REG(0x030),
 165	REG(0x038),
 166	REG(0x03c),
 167	REG(0x168),
 168	REG(0x140),
 169	REG(0x110),
 170	REG(0x1c0),
 171	REG(0x1c4),
 172	REG(0x1c8),
 173	REG(0x180),
 174	REG16(0x2b4),
 175
 176	NOP(5),
 177	LRI(9, POSTED),
 178	REG16(0x3a8),
 179	REG16(0x28c),
 180	REG16(0x288),
 181	REG16(0x284),
 182	REG16(0x280),
 183	REG16(0x27c),
 184	REG16(0x278),
 185	REG16(0x274),
 186	REG16(0x270),
 187
 188	0
 189};
 190
 191static const u8 dg2_xcs_offsets[] = {
 192	NOP(1),
 193	LRI(15, POSTED),
 194	REG16(0x244),
 195	REG(0x034),
 196	REG(0x030),
 197	REG(0x038),
 198	REG(0x03c),
 199	REG(0x168),
 200	REG(0x140),
 201	REG(0x110),
 202	REG(0x1c0),
 203	REG(0x1c4),
 204	REG(0x1c8),
 205	REG(0x180),
 206	REG16(0x2b4),
 207	REG(0x120),
 208	REG(0x124),
 209
 210	NOP(1),
 211	LRI(9, POSTED),
 212	REG16(0x3a8),
 213	REG16(0x28c),
 214	REG16(0x288),
 215	REG16(0x284),
 216	REG16(0x280),
 217	REG16(0x27c),
 218	REG16(0x278),
 219	REG16(0x274),
 220	REG16(0x270),
 221
 222	0
 223};
 224
 225static const u8 gen12_rcs_offsets[] = {
 226	NOP(1),
 227	LRI(13, POSTED),
 228	REG16(0x244),
 229	REG(0x034),
 230	REG(0x030),
 231	REG(0x038),
 232	REG(0x03c),
 233	REG(0x168),
 234	REG(0x140),
 235	REG(0x110),
 236	REG(0x1c0),
 237	REG(0x1c4),
 238	REG(0x1c8),
 239	REG(0x180),
 240	REG16(0x2b4),
 241
 242	NOP(5),
 243	LRI(9, POSTED),
 244	REG16(0x3a8),
 245	REG16(0x28c),
 246	REG16(0x288),
 247	REG16(0x284),
 248	REG16(0x280),
 249	REG16(0x27c),
 250	REG16(0x278),
 251	REG16(0x274),
 252	REG16(0x270),
 253
 254	LRI(3, POSTED),
 255	REG(0x1b0),
 256	REG16(0x5a8),
 257	REG16(0x5ac),
 258
 259	NOP(6),
 260	LRI(1, 0),
 261	REG(0x0c8),
 262	NOP(3 + 9 + 1),
 263
 264	LRI(51, POSTED),
 265	REG16(0x588),
 266	REG16(0x588),
 267	REG16(0x588),
 268	REG16(0x588),
 269	REG16(0x588),
 270	REG16(0x588),
 271	REG(0x028),
 272	REG(0x09c),
 273	REG(0x0c0),
 274	REG(0x178),
 275	REG(0x17c),
 276	REG16(0x358),
 277	REG(0x170),
 278	REG(0x150),
 279	REG(0x154),
 280	REG(0x158),
 281	REG16(0x41c),
 282	REG16(0x600),
 283	REG16(0x604),
 284	REG16(0x608),
 285	REG16(0x60c),
 286	REG16(0x610),
 287	REG16(0x614),
 288	REG16(0x618),
 289	REG16(0x61c),
 290	REG16(0x620),
 291	REG16(0x624),
 292	REG16(0x628),
 293	REG16(0x62c),
 294	REG16(0x630),
 295	REG16(0x634),
 296	REG16(0x638),
 297	REG16(0x63c),
 298	REG16(0x640),
 299	REG16(0x644),
 300	REG16(0x648),
 301	REG16(0x64c),
 302	REG16(0x650),
 303	REG16(0x654),
 304	REG16(0x658),
 305	REG16(0x65c),
 306	REG16(0x660),
 307	REG16(0x664),
 308	REG16(0x668),
 309	REG16(0x66c),
 310	REG16(0x670),
 311	REG16(0x674),
 312	REG16(0x678),
 313	REG16(0x67c),
 314	REG(0x068),
 315	REG(0x084),
 316	NOP(1),
 317
 318	0
 319};
 320
 321static const u8 xehp_rcs_offsets[] = {
 322	NOP(1),
 323	LRI(13, POSTED),
 324	REG16(0x244),
 325	REG(0x034),
 326	REG(0x030),
 327	REG(0x038),
 328	REG(0x03c),
 329	REG(0x168),
 330	REG(0x140),
 331	REG(0x110),
 332	REG(0x1c0),
 333	REG(0x1c4),
 334	REG(0x1c8),
 335	REG(0x180),
 336	REG16(0x2b4),
 337
 338	NOP(5),
 339	LRI(9, POSTED),
 340	REG16(0x3a8),
 341	REG16(0x28c),
 342	REG16(0x288),
 343	REG16(0x284),
 344	REG16(0x280),
 345	REG16(0x27c),
 346	REG16(0x278),
 347	REG16(0x274),
 348	REG16(0x270),
 349
 350	LRI(3, POSTED),
 351	REG(0x1b0),
 352	REG16(0x5a8),
 353	REG16(0x5ac),
 354
 355	NOP(6),
 356	LRI(1, 0),
 357	REG(0x0c8),
 358
 359	0
 360};
 361
 362static const u8 dg2_rcs_offsets[] = {
 363	NOP(1),
 364	LRI(15, POSTED),
 365	REG16(0x244),
 366	REG(0x034),
 367	REG(0x030),
 368	REG(0x038),
 369	REG(0x03c),
 370	REG(0x168),
 371	REG(0x140),
 372	REG(0x110),
 373	REG(0x1c0),
 374	REG(0x1c4),
 375	REG(0x1c8),
 376	REG(0x180),
 377	REG16(0x2b4),
 378	REG(0x120),
 379	REG(0x124),
 380
 381	NOP(1),
 382	LRI(9, POSTED),
 383	REG16(0x3a8),
 384	REG16(0x28c),
 385	REG16(0x288),
 386	REG16(0x284),
 387	REG16(0x280),
 388	REG16(0x27c),
 389	REG16(0x278),
 390	REG16(0x274),
 391	REG16(0x270),
 392
 393	LRI(3, POSTED),
 394	REG(0x1b0),
 395	REG16(0x5a8),
 396	REG16(0x5ac),
 397
 398	NOP(6),
 399	LRI(1, 0),
 400	REG(0x0c8),
 401
 402	0
 403};
 404
 405static const u8 mtl_rcs_offsets[] = {
 406	NOP(1),
 407	LRI(15, POSTED),
 408	REG16(0x244),
 409	REG(0x034),
 410	REG(0x030),
 411	REG(0x038),
 412	REG(0x03c),
 413	REG(0x168),
 414	REG(0x140),
 415	REG(0x110),
 416	REG(0x1c0),
 417	REG(0x1c4),
 418	REG(0x1c8),
 419	REG(0x180),
 420	REG16(0x2b4),
 421	REG(0x120),
 422	REG(0x124),
 423
 424	NOP(1),
 425	LRI(9, POSTED),
 426	REG16(0x3a8),
 427	REG16(0x28c),
 428	REG16(0x288),
 429	REG16(0x284),
 430	REG16(0x280),
 431	REG16(0x27c),
 432	REG16(0x278),
 433	REG16(0x274),
 434	REG16(0x270),
 435
 436	NOP(2),
 437	LRI(2, POSTED),
 438	REG16(0x5a8),
 439	REG16(0x5ac),
 440
 441	NOP(6),
 442	LRI(1, 0),
 443	REG(0x0c8),
 444
 445	0
 446};
 447
 448#define XE2_CTX_COMMON \
 449	NOP(1),                 /* [0x00] */ \
 450	LRI(15, POSTED),        /* [0x01] */ \
 451	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
 452	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
 453	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
 454	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
 455	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
 456	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
 457	REG(0x140),             /* [0x0e] BB_ADDR */ \
 458	REG(0x110),             /* [0x10] BB_STATE */ \
 459	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
 460	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
 461	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
 462	REG(0x180),             /* [0x18] CCID */ \
 463	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
 464	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
 465	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
 466	\
 467	NOP(1),                 /* [0x20] */ \
 468	LRI(9, POSTED),         /* [0x21] */ \
 469	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
 470	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
 471	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
 472	REG16(0x284),           /* [0x28] dummy reg */ \
 473	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
 474	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
 475	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
 476	REG16(0x274),           /* [0x30] PTBP_UDW */ \
 477	REG16(0x270)            /* [0x32] PTBP_LDW */
 478
 479static const u8 xe2_rcs_offsets[] = {
 480	XE2_CTX_COMMON,
 481
 482	NOP(2),                 /* [0x34] */
 483	LRI(2, POSTED),         /* [0x36] */
 484	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
 485	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
 486
 487	NOP(6),                 /* [0x41] */
 488	LRI(1, 0),              /* [0x47] */
 489	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
 490
 491	0
 492};
 493
 494static const u8 xe2_bcs_offsets[] = {
 495	XE2_CTX_COMMON,
 496
 497	NOP(4 + 8 + 1),         /* [0x34] */
 498	LRI(2, POSTED),         /* [0x41] */
 499	REG16(0x200),           /* [0x42] BCS_SWCTRL */
 500	REG16(0x204),           /* [0x44] BLIT_CCTL */
 501
 502	0
 503};
 504
 505static const u8 xe2_xcs_offsets[] = {
 506	XE2_CTX_COMMON,
 507
 508	0
 509};
 510
 511static const u8 xe2_indirect_ring_state_offsets[] = {
 512	NOP(1),                 /* [0x00] */
 513	LRI(5, POSTED),         /* [0x01] */
 514	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
 515	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
 516	REG(0x038),             /* [0x06] RING_BUFFER_START */
 517	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
 518	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
 519
 520	NOP(5),                 /* [0x0c] */
 521	LRI(9, POSTED),         /* [0x11] */
 522	REG(0x168),             /* [0x12] BB_ADDR_UDW */
 523	REG(0x140),             /* [0x14] BB_ADDR */
 524	REG(0x110),             /* [0x16] BB_STATE */
 525	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
 526	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
 527	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
 528	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
 529	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
 530	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
 531
 532	NOP(12),                 /* [0x00] */
 533
 534	0
 535};
 536
 537#undef REG16
 538#undef REG
 539#undef LRI
 540#undef NOP
 541
 542static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 543{
 544	if (class == XE_ENGINE_CLASS_RENDER) {
 545		if (GRAPHICS_VER(xe) >= 20)
 546			return xe2_rcs_offsets;
 547		else if (GRAPHICS_VERx100(xe) >= 1270)
 548			return mtl_rcs_offsets;
 549		else if (GRAPHICS_VERx100(xe) >= 1255)
 550			return dg2_rcs_offsets;
 551		else if (GRAPHICS_VERx100(xe) >= 1250)
 552			return xehp_rcs_offsets;
 553		else
 554			return gen12_rcs_offsets;
 555	} else if (class == XE_ENGINE_CLASS_COPY) {
 556		if (GRAPHICS_VER(xe) >= 20)
 557			return xe2_bcs_offsets;
 558		else
 559			return gen12_xcs_offsets;
 560	} else {
 561		if (GRAPHICS_VER(xe) >= 20)
 562			return xe2_xcs_offsets;
 563		else if (GRAPHICS_VERx100(xe) >= 1255)
 564			return dg2_xcs_offsets;
 565		else
 566			return gen12_xcs_offsets;
 567	}
 568}
 569
 570static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 571{
 572	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 573						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 574
 575	if (xe_gt_has_indirect_ring_state(hwe->gt))
 576		regs[CTX_CONTEXT_CONTROL] |=
 577			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
 578
 579	/* TODO: Timestamp */
 580}
 581
 582static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
 583{
 584	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
 585	struct xe_device *xe = gt_to_xe(hwe->gt);
 586
 587	if (!xe_device_uses_memirq(xe))
 588		return;
 589
 590	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
 591					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
 592	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
 593	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
 594
 595	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
 596				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
 597	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
 598	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
 599	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
 600	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
 601}
 602
 603static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
 604{
 605	struct xe_device *xe = gt_to_xe(hwe->gt);
 606
 607	if (GRAPHICS_VERx100(xe) >= 1250)
 608		return 0x70;
 609	else
 610		return 0x60;
 611}
 612
 613static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
 614{
 615	int x;
 616
 617	x = lrc_ring_mi_mode(hwe);
 618	regs[x + 1] &= ~STOP_RING;
 619	regs[x + 1] |= STOP_RING << 16;
 620}
 621
 622static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
 623{
 624	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
 625}
 626
 627static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
 628{
 629	return 0;
 630}
 631
 632u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
 633{
 634	return lrc->ring.size;
 635}
 636
 637/* Make the magic macros work */
 638#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
 639#define __xe_lrc_regs_offset xe_lrc_regs_offset
 640
 641#define LRC_SEQNO_PPHWSP_OFFSET 512
 642#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
 643#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
 644#define LRC_PARALLEL_PPHWSP_OFFSET 2048
 645#define LRC_PPHWSP_SIZE SZ_4K
 646
 647u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
 648{
 649	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
 650}
 651
 652static size_t lrc_reg_size(struct xe_device *xe)
 653{
 654	if (GRAPHICS_VERx100(xe) >= 1250)
 655		return 96 * sizeof(u32);
 656	else
 657		return 80 * sizeof(u32);
 658}
 659
 660size_t xe_lrc_skip_size(struct xe_device *xe)
 661{
 662	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
 663}
 664
 665static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
 666{
 667	/* The seqno is stored in the driver-defined portion of PPHWSP */
 668	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
 669}
 670
 671static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
 672{
 673	/* The start seqno is stored in the driver-defined portion of PPHWSP */
 674	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
 675}
 676
 677static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
 678{
 679	/* The start seqno is stored in the driver-defined portion of PPHWSP */
 680	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
 681}
 682
 683static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
 684{
 685	/* The parallel is stored in the driver-defined portion of PPHWSP */
 686	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
 687}
 688
 689static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
 690{
 691	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
 692}
 693
 694static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
 695{
 696	/* Indirect ring state page is at the very end of LRC */
 697	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
 698}
 699
 700#define DECL_MAP_ADDR_HELPERS(elem) \
 701static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 702{ \
 703	struct iosys_map map = lrc->bo->vmap; \
 704\
 705	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
 706	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
 707	return map; \
 708} \
 709static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
 710{ \
 711	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
 712} \
 713
 714DECL_MAP_ADDR_HELPERS(ring)
 715DECL_MAP_ADDR_HELPERS(pphwsp)
 716DECL_MAP_ADDR_HELPERS(seqno)
 717DECL_MAP_ADDR_HELPERS(regs)
 718DECL_MAP_ADDR_HELPERS(start_seqno)
 719DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
 720DECL_MAP_ADDR_HELPERS(ctx_timestamp)
 721DECL_MAP_ADDR_HELPERS(parallel)
 722DECL_MAP_ADDR_HELPERS(indirect_ring)
 723
 724#undef DECL_MAP_ADDR_HELPERS
 725
 726/**
 727 * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
 728 * @lrc: Pointer to the lrc.
 729 *
 730 * Returns: ctx timestamp GGTT address
 731 */
 732u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
 733{
 734	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
 735}
 736
 737/**
 738 * xe_lrc_ctx_timestamp() - Read ctx timestamp value
 739 * @lrc: Pointer to the lrc.
 740 *
 741 * Returns: ctx timestamp value
 742 */
 743u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
 744{
 745	struct xe_device *xe = lrc_to_xe(lrc);
 746	struct iosys_map map;
 747
 748	map = __xe_lrc_ctx_timestamp_map(lrc);
 749	return xe_map_read32(xe, &map);
 750}
 751
 752/**
 753 * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
 754 * @lrc: Pointer to the lrc.
 755 *
 756 * Returns: ctx timestamp job GGTT address
 757 */
 758u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
 759{
 760	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
 761}
 762
 763/**
 764 * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
 765 * @lrc: Pointer to the lrc.
 766 *
 767 * Returns: ctx timestamp job value
 768 */
 769u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
 770{
 771	struct xe_device *xe = lrc_to_xe(lrc);
 772	struct iosys_map map;
 773
 774	map = __xe_lrc_ctx_job_timestamp_map(lrc);
 775	return xe_map_read32(xe, &map);
 776}
 777
 778u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
 779{
 780	return __xe_lrc_pphwsp_ggtt_addr(lrc);
 781}
 782
 783u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
 784{
 785	if (!xe_lrc_has_indirect_ring_state(lrc))
 786		return 0;
 787
 788	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
 789}
 790
 791static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
 792{
 793	struct xe_device *xe = lrc_to_xe(lrc);
 794	struct iosys_map map;
 795
 796	map = __xe_lrc_indirect_ring_map(lrc);
 797	iosys_map_incr(&map, reg_nr * sizeof(u32));
 798	return xe_map_read32(xe, &map);
 799}
 800
 801static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
 802					  int reg_nr, u32 val)
 803{
 804	struct xe_device *xe = lrc_to_xe(lrc);
 805	struct iosys_map map;
 806
 807	map = __xe_lrc_indirect_ring_map(lrc);
 808	iosys_map_incr(&map, reg_nr * sizeof(u32));
 809	xe_map_write32(xe, &map, val);
 810}
 811
 812u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
 813{
 814	struct xe_device *xe = lrc_to_xe(lrc);
 815	struct iosys_map map;
 816
 817	map = __xe_lrc_regs_map(lrc);
 818	iosys_map_incr(&map, reg_nr * sizeof(u32));
 819	return xe_map_read32(xe, &map);
 820}
 821
 822void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
 823{
 824	struct xe_device *xe = lrc_to_xe(lrc);
 825	struct iosys_map map;
 826
 827	map = __xe_lrc_regs_map(lrc);
 828	iosys_map_incr(&map, reg_nr * sizeof(u32));
 829	xe_map_write32(xe, &map, val);
 830}
 831
 832static void *empty_lrc_data(struct xe_hw_engine *hwe)
 833{
 834	struct xe_gt *gt = hwe->gt;
 835	void *data;
 836	u32 *regs;
 837
 838	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
 839	if (!data)
 840		return NULL;
 841
 842	/* 1st page: Per-Process of HW status Page */
 843	regs = data + LRC_PPHWSP_SIZE;
 844	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
 845	set_context_control(regs, hwe);
 846	set_memory_based_intr(regs, hwe);
 847	reset_stop_ring(regs, hwe);
 848	if (xe_gt_has_indirect_ring_state(gt)) {
 849		regs = data + xe_gt_lrc_size(gt, hwe->class) -
 850		       LRC_INDIRECT_RING_STATE_SIZE;
 851		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
 852	}
 853
 854	return data;
 855}
 856
 857static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
 858{
 859	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
 860
 861	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
 862	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
 863}
 864
 865static void xe_lrc_finish(struct xe_lrc *lrc)
 866{
 867	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
 868	xe_bo_lock(lrc->bo, false);
 869	xe_bo_unpin(lrc->bo);
 870	xe_bo_unlock(lrc->bo);
 871	xe_bo_put(lrc->bo);
 872}
 873
 874#define PVC_CTX_ASID		(0x2e + 1)
 875#define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
 876
 877static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 878		       struct xe_vm *vm, u32 ring_size)
 879{
 880	struct xe_gt *gt = hwe->gt;
 881	struct xe_tile *tile = gt_to_tile(gt);
 882	struct xe_device *xe = gt_to_xe(gt);
 883	struct iosys_map map;
 884	void *init_data = NULL;
 885	u32 arb_enable;
 886	u32 lrc_size;
 887	int err;
 888
 889	kref_init(&lrc->refcount);
 890	lrc->flags = 0;
 891	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
 892	if (xe_gt_has_indirect_ring_state(gt))
 893		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
 894
 895	/*
 896	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
 897	 * via VM bind calls.
 898	 */
 899	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
 900				       ttm_bo_type_kernel,
 901				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 902				       XE_BO_FLAG_GGTT |
 903				       XE_BO_FLAG_GGTT_INVALIDATE);
 904	if (IS_ERR(lrc->bo))
 905		return PTR_ERR(lrc->bo);
 906
 907	lrc->size = lrc_size;
 908	lrc->tile = gt_to_tile(hwe->gt);
 909	lrc->ring.size = ring_size;
 910	lrc->ring.tail = 0;
 911	lrc->ctx_timestamp = 0;
 912
 913	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
 914			     hwe->fence_irq, hwe->name);
 915
 916	if (!gt->default_lrc[hwe->class]) {
 917		init_data = empty_lrc_data(hwe);
 918		if (!init_data) {
 919			err = -ENOMEM;
 920			goto err_lrc_finish;
 921		}
 922	}
 923
 924	/*
 925	 * Init Per-Process of HW status Page, LRC / context state to known
 926	 * values
 927	 */
 928	map = __xe_lrc_pphwsp_map(lrc);
 929	if (!init_data) {
 930		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
 931		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
 932				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
 933				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
 934	} else {
 935		xe_map_memcpy_to(xe, &map, 0, init_data,
 936				 xe_gt_lrc_size(gt, hwe->class));
 937		kfree(init_data);
 938	}
 939
 940	if (vm) {
 941		xe_lrc_set_ppgtt(lrc, vm);
 942
 943		if (vm->xef)
 944			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
 945	}
 946
 947	if (xe_gt_has_indirect_ring_state(gt)) {
 948		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
 949				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
 950
 951		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
 952					      __xe_lrc_ring_ggtt_addr(lrc));
 953		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
 954		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
 955		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
 956		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
 957					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
 958	} else {
 959		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
 960		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
 961		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 962		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
 963				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
 964	}
 965
 966	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
 967
 968	if (xe->info.has_asid && vm)
 969		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
 970
 971	lrc->desc = LRC_VALID;
 972	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
 973	/* TODO: Priority */
 974
 975	/* While this appears to have something about privileged batches or
 976	 * some such, it really just means PPGTT mode.
 977	 */
 978	if (vm)
 979		lrc->desc |= LRC_PRIVILEGE;
 980
 981	if (GRAPHICS_VERx100(xe) < 1250) {
 982		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
 983		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
 984	}
 985
 986	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 987	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
 988
 989	map = __xe_lrc_seqno_map(lrc);
 990	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 991
 992	map = __xe_lrc_start_seqno_map(lrc);
 993	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 994
 995	return 0;
 996
 997err_lrc_finish:
 998	xe_lrc_finish(lrc);
 999	return err;
1000}
1001
1002/**
1003 * xe_lrc_create - Create a LRC
1004 * @hwe: Hardware Engine
1005 * @vm: The VM (address space)
1006 * @ring_size: LRC ring size
1007 *
1008 * Allocate and initialize the Logical Ring Context (LRC).
1009 *
1010 * Return pointer to created LRC upon success and an error pointer
1011 * upon failure.
1012 */
1013struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1014			     u32 ring_size)
1015{
1016	struct xe_lrc *lrc;
1017	int err;
1018
1019	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1020	if (!lrc)
1021		return ERR_PTR(-ENOMEM);
1022
1023	err = xe_lrc_init(lrc, hwe, vm, ring_size);
1024	if (err) {
1025		kfree(lrc);
1026		return ERR_PTR(err);
1027	}
1028
1029	return lrc;
1030}
1031
1032/**
1033 * xe_lrc_destroy - Destroy the LRC
1034 * @ref: reference to LRC
1035 *
1036 * Called when ref == 0, release resources held by the Logical Ring Context
1037 * (LRC) and free the LRC memory.
1038 */
1039void xe_lrc_destroy(struct kref *ref)
1040{
1041	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1042
1043	xe_lrc_finish(lrc);
1044	kfree(lrc);
1045}
1046
1047void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1048{
1049	if (xe_lrc_has_indirect_ring_state(lrc))
1050		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1051	else
1052		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1053}
1054
1055u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1056{
1057	if (xe_lrc_has_indirect_ring_state(lrc))
1058		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1059	else
1060		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1061}
1062
1063void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1064{
1065	if (xe_lrc_has_indirect_ring_state(lrc))
1066		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1067	else
1068		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1069}
1070
1071u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1072{
1073	if (xe_lrc_has_indirect_ring_state(lrc))
1074		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1075	else
1076		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1077}
1078
1079u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1080{
1081	const u32 head = xe_lrc_ring_head(lrc);
1082	const u32 tail = lrc->ring.tail;
1083	const u32 size = lrc->ring.size;
1084
1085	return ((head - tail - 1) & (size - 1)) + 1;
1086}
1087
1088static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1089				const void *data, size_t size)
1090{
1091	struct xe_device *xe = lrc_to_xe(lrc);
1092
1093	iosys_map_incr(&ring, lrc->ring.tail);
1094	xe_map_memcpy_to(xe, &ring, 0, data, size);
1095	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1096}
1097
1098void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1099{
1100	struct xe_device *xe = lrc_to_xe(lrc);
1101	struct iosys_map ring;
1102	u32 rhs;
1103	size_t aligned_size;
1104
1105	xe_assert(xe, IS_ALIGNED(size, 4));
1106	aligned_size = ALIGN(size, 8);
1107
1108	ring = __xe_lrc_ring_map(lrc);
1109
1110	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1111	rhs = lrc->ring.size - lrc->ring.tail;
1112	if (size > rhs) {
1113		__xe_lrc_write_ring(lrc, ring, data, rhs);
1114		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1115	} else {
1116		__xe_lrc_write_ring(lrc, ring, data, size);
1117	}
1118
1119	if (aligned_size > size) {
1120		u32 noop = MI_NOOP;
1121
1122		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1123	}
1124}
1125
1126u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1127{
1128	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1129}
1130
1131u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1132{
1133	return __xe_lrc_seqno_ggtt_addr(lrc);
1134}
1135
1136/**
1137 * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1138 *
1139 * Allocate but don't initialize an lrc seqno fence.
1140 *
1141 * Return: Pointer to the allocated fence or
1142 * negative error pointer on error.
1143 */
1144struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1145{
1146	return xe_hw_fence_alloc();
1147}
1148
1149/**
1150 * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1151 * @fence: Pointer to the fence to free.
1152 *
1153 * Frees an lrc seqno fence that hasn't yet been
1154 * initialized.
1155 */
1156void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1157{
1158	xe_hw_fence_free(fence);
1159}
1160
1161/**
1162 * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1163 * @lrc: Pointer to the lrc.
1164 * @fence: Pointer to the fence to initialize.
1165 *
1166 * Initializes a pre-allocated lrc seqno fence.
1167 * After initialization, the fence is subject to normal
1168 * dma-fence refcounting.
1169 */
1170void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1171{
1172	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1173}
1174
1175s32 xe_lrc_seqno(struct xe_lrc *lrc)
1176{
1177	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1178
1179	return xe_map_read32(lrc_to_xe(lrc), &map);
1180}
1181
1182s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1183{
1184	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1185
1186	return xe_map_read32(lrc_to_xe(lrc), &map);
1187}
1188
1189u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1190{
1191	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1192}
1193
1194u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1195{
1196	return __xe_lrc_parallel_ggtt_addr(lrc);
1197}
1198
1199struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1200{
1201	return __xe_lrc_parallel_map(lrc);
1202}
1203
1204static int instr_dw(u32 cmd_header)
1205{
1206	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1207	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1208	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1209		return 1;
1210
1211	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1212	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1213		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1214
1215	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1216	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1217}
1218
1219static int dump_mi_command(struct drm_printer *p,
1220			   struct xe_gt *gt,
1221			   u32 *dw,
1222			   int remaining_dw)
1223{
1224	u32 inst_header = *dw;
1225	u32 numdw = instr_dw(inst_header);
1226	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1227	int num_noop;
1228
1229	/* First check for commands that don't have/use a '# DW' field */
1230	switch (inst_header & MI_OPCODE) {
1231	case MI_NOOP:
1232		num_noop = 1;
1233		while (num_noop < remaining_dw &&
1234		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1235			num_noop++;
1236		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1237		return num_noop;
1238
1239	case MI_TOPOLOGY_FILTER:
1240		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1241		return 1;
1242
1243	case MI_BATCH_BUFFER_END:
1244		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1245		/* Return 'remaining_dw' to consume the rest of the LRC */
1246		return remaining_dw;
1247	}
1248
1249	/*
1250	 * Any remaining commands include a # of dwords.  We should make sure
1251	 * it doesn't exceed the remaining size of the LRC.
1252	 */
1253	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1254		numdw = remaining_dw;
1255
1256	switch (inst_header & MI_OPCODE) {
1257	case MI_LOAD_REGISTER_IMM:
1258		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1259			   inst_header, (numdw - 1) / 2);
1260		for (int i = 1; i < numdw; i += 2)
1261			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1262		return numdw;
1263
1264	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1265		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1266			   inst_header,
1267			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1268			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1269		if (numdw == 4)
1270			drm_printf(p, " - %#6x = %#010llx\n",
1271				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1272		else
1273			drm_printf(p, " - %*ph (%s)\n",
1274				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1275				   numdw < 4 ? "truncated" : "malformed");
1276		return numdw;
1277
1278	case MI_FORCE_WAKEUP:
1279		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1280		return numdw;
1281
1282	default:
1283		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1284			   inst_header, opcode, numdw);
1285		return numdw;
1286	}
1287}
1288
1289static int dump_gfxpipe_command(struct drm_printer *p,
1290				struct xe_gt *gt,
1291				u32 *dw,
1292				int remaining_dw)
1293{
1294	u32 numdw = instr_dw(*dw);
1295	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1296	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1297	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1298
1299	/*
1300	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1301	 * remaining size of the LRC.
1302	 */
1303	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1304		numdw = remaining_dw;
1305
1306	switch (*dw & GFXPIPE_MATCH_MASK) {
1307#define MATCH(cmd) \
1308	case cmd: \
1309		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1310		return numdw
1311#define MATCH3D(cmd) \
1312	case CMD_##cmd: \
1313		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1314		return numdw
1315
1316	MATCH(STATE_BASE_ADDRESS);
1317	MATCH(STATE_SIP);
1318	MATCH(GPGPU_CSR_BASE_ADDRESS);
1319	MATCH(STATE_COMPUTE_MODE);
1320	MATCH3D(3DSTATE_BTD);
1321	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1322	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1323
1324	MATCH3D(3DSTATE_VF_STATISTICS);
1325
1326	MATCH(PIPELINE_SELECT);
1327
1328	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1329	MATCH3D(3DSTATE_CLEAR_PARAMS);
1330	MATCH3D(3DSTATE_DEPTH_BUFFER);
1331	MATCH3D(3DSTATE_STENCIL_BUFFER);
1332	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1333	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1334	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1335	MATCH3D(3DSTATE_INDEX_BUFFER);
1336	MATCH3D(3DSTATE_VF);
1337	MATCH3D(3DSTATE_MULTISAMPLE);
1338	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1339	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1340	MATCH3D(3DSTATE_VS);
1341	MATCH3D(3DSTATE_GS);
1342	MATCH3D(3DSTATE_CLIP);
1343	MATCH3D(3DSTATE_SF);
1344	MATCH3D(3DSTATE_WM);
1345	MATCH3D(3DSTATE_CONSTANT_VS);
1346	MATCH3D(3DSTATE_CONSTANT_GS);
1347	MATCH3D(3DSTATE_CONSTANT_PS);
1348	MATCH3D(3DSTATE_SAMPLE_MASK);
1349	MATCH3D(3DSTATE_CONSTANT_HS);
1350	MATCH3D(3DSTATE_CONSTANT_DS);
1351	MATCH3D(3DSTATE_HS);
1352	MATCH3D(3DSTATE_TE);
1353	MATCH3D(3DSTATE_DS);
1354	MATCH3D(3DSTATE_STREAMOUT);
1355	MATCH3D(3DSTATE_SBE);
1356	MATCH3D(3DSTATE_PS);
1357	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1358	MATCH3D(3DSTATE_CPS_POINTERS);
1359	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1360	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1361	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1362	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1363	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1364	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1365	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1366	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1367	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1368	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1369	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1370	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1371	MATCH3D(3DSTATE_VF_INSTANCING);
1372	MATCH3D(3DSTATE_VF_SGVS);
1373	MATCH3D(3DSTATE_VF_TOPOLOGY);
1374	MATCH3D(3DSTATE_WM_CHROMAKEY);
1375	MATCH3D(3DSTATE_PS_BLEND);
1376	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1377	MATCH3D(3DSTATE_PS_EXTRA);
1378	MATCH3D(3DSTATE_RASTER);
1379	MATCH3D(3DSTATE_SBE_SWIZ);
1380	MATCH3D(3DSTATE_WM_HZ_OP);
1381	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1382	MATCH3D(3DSTATE_VF_SGVS_2);
1383	MATCH3D(3DSTATE_VFG);
1384	MATCH3D(3DSTATE_URB_ALLOC_VS);
1385	MATCH3D(3DSTATE_URB_ALLOC_HS);
1386	MATCH3D(3DSTATE_URB_ALLOC_DS);
1387	MATCH3D(3DSTATE_URB_ALLOC_GS);
1388	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1389	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1390	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1391	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1392	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1393	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1394	MATCH3D(3DSTATE_AMFS);
1395	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1396	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1397	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1398	MATCH3D(3DSTATE_MESH_CONTROL);
1399	MATCH3D(3DSTATE_MESH_DISTRIB);
1400	MATCH3D(3DSTATE_TASK_REDISTRIB);
1401	MATCH3D(3DSTATE_MESH_SHADER);
1402	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1403	MATCH3D(3DSTATE_TASK_CONTROL);
1404	MATCH3D(3DSTATE_TASK_SHADER);
1405	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1406	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1407	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1408	MATCH3D(3DSTATE_CLIP_MESH);
1409	MATCH3D(3DSTATE_SBE_MESH);
1410	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1411
1412	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1413	MATCH3D(3DSTATE_CHROMA_KEY);
1414	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1415	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1416	MATCH3D(3DSTATE_LINE_STIPPLE);
1417	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1418	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1419	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1420	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1421	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1422	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1423	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1424	MATCH3D(3DSTATE_SO_DECL_LIST);
1425	MATCH3D(3DSTATE_SO_BUFFER);
1426	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1427	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1428	MATCH3D(3DSTATE_3D_MODE);
1429	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1430	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1431	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1432
1433	default:
1434		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1435			   *dw, pipeline, opcode, subopcode, numdw);
1436		return numdw;
1437	}
1438}
1439
1440static int dump_gfx_state_command(struct drm_printer *p,
1441				  struct xe_gt *gt,
1442				  u32 *dw,
1443				  int remaining_dw)
1444{
1445	u32 numdw = instr_dw(*dw);
1446	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1447
1448	/*
1449	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1450	 * remaining size of the LRC.
1451	 */
1452	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1453		numdw = remaining_dw;
1454
1455	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1456	MATCH(STATE_WRITE_INLINE);
1457
1458	default:
1459		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1460			   *dw, opcode, numdw);
1461		return numdw;
1462	}
1463}
1464
1465void xe_lrc_dump_default(struct drm_printer *p,
1466			 struct xe_gt *gt,
1467			 enum xe_engine_class hwe_class)
1468{
1469	u32 *dw;
1470	int remaining_dw, num_dw;
1471
1472	if (!gt->default_lrc[hwe_class]) {
1473		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1474		return;
1475	}
1476
1477	/*
1478	 * Skip the beginning of the LRC since it contains the per-process
1479	 * hardware status page.
1480	 */
1481	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1482	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1483
1484	while (remaining_dw > 0) {
1485		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1486			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1487		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1488			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1489		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1490			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1491		} else {
1492			num_dw = min(instr_dw(*dw), remaining_dw);
1493			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1494				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1495				   num_dw);
1496		}
1497
1498		dw += num_dw;
1499		remaining_dw -= num_dw;
1500	}
1501}
1502
1503struct instr_state {
1504	u32 instr;
1505	u16 num_dw;
1506};
1507
1508static const struct instr_state xe_hpg_svg_state[] = {
1509	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1510	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1511	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1512	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1513	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1514	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1515	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1516	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1517	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1518	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1519	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1520	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1521	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1522	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1523	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1524	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1525	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1526	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1527	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1528	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1529	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1530	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1531	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1532	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1533	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1534	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1535	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1536	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1537	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1538	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1539	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1540	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1541	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1542	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1543	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1544	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1545	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1546	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1547	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1548	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1549	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1550	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1551	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1552	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1553	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1554	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1555	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1556	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1557	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1558	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1559};
1560
1561void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1562{
1563	struct xe_gt *gt = q->hwe->gt;
1564	struct xe_device *xe = gt_to_xe(gt);
1565	const struct instr_state *state_table = NULL;
1566	int state_table_size = 0;
1567
1568	/*
1569	 * Wa_14019789679
1570	 *
1571	 * If the driver doesn't explicitly emit the SVG instructions while
1572	 * setting up the default LRC, the context switch will write 0's
1573	 * (noops) into the LRC memory rather than the expected instruction
1574	 * headers.  Application contexts start out as a copy of the default
1575	 * LRC, and if they also do not emit specific settings for some SVG
1576	 * state, then on context restore they'll unintentionally inherit
1577	 * whatever state setting the previous context had programmed into the
1578	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1579	 * prevent the hardware from resetting that state back to any specific
1580	 * value).
1581	 *
1582	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1583	 * since that's a specific state setting that can easily cause GPU
1584	 * hangs if unintentionally inherited.  However to be safe we'll
1585	 * continue to emit all of the SVG state since it's best not to leak
1586	 * any of the state between contexts, even if that leakage is harmless.
1587	 */
1588	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1589		state_table = xe_hpg_svg_state;
1590		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1591	}
1592
1593	if (!state_table) {
1594		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1595			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1596		return;
1597	}
1598
1599	for (int i = 0; i < state_table_size; i++) {
1600		u32 instr = state_table[i].instr;
1601		u16 num_dw = state_table[i].num_dw;
1602		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1603
1604		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1605		xe_gt_assert(gt, num_dw != 0);
1606		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1607
1608		/*
1609		 * Xe2's SVG context is the same as the one on DG2 / MTL
1610		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1611		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1612		 * Just make the replacement here rather than defining a
1613		 * whole separate table for the single trivial change.
1614		 */
1615		if (GRAPHICS_VER(xe) >= 20 &&
1616		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1617			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1618
1619		bb->cs[bb->len] = instr;
1620		if (!is_single_dw)
1621			bb->cs[bb->len] |= (num_dw - 2);
1622
1623		bb->len += num_dw;
1624	}
1625}
1626
1627struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1628{
1629	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1630
1631	if (!snapshot)
1632		return NULL;
1633
1634	if (lrc->bo->vm)
1635		xe_vm_get(lrc->bo->vm);
1636
1637	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1638	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1639	snapshot->head = xe_lrc_ring_head(lrc);
1640	snapshot->tail.internal = lrc->ring.tail;
1641	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1642	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1643	snapshot->seqno = xe_lrc_seqno(lrc);
1644	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1645	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1646	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1647	snapshot->lrc_snapshot = NULL;
1648	snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1649	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1650	return snapshot;
1651}
1652
1653void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1654{
1655	struct xe_bo *bo;
1656	struct xe_vm *vm;
1657	struct iosys_map src;
1658
1659	if (!snapshot)
1660		return;
1661
1662	bo = snapshot->lrc_bo;
1663	vm = bo->vm;
1664	snapshot->lrc_bo = NULL;
1665
1666	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1667	if (!snapshot->lrc_snapshot)
1668		goto put_bo;
1669
1670	xe_bo_lock(bo, false);
1671	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1672		xe_map_memcpy_from(xe_bo_device(bo),
1673				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1674				   snapshot->lrc_size);
1675		ttm_bo_vunmap(&bo->ttm, &src);
1676	} else {
1677		kvfree(snapshot->lrc_snapshot);
1678		snapshot->lrc_snapshot = NULL;
1679	}
1680	xe_bo_unlock(bo);
1681put_bo:
1682	xe_bo_put(bo);
1683	if (vm)
1684		xe_vm_put(vm);
1685}
1686
1687void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1688{
1689	unsigned long i;
1690
1691	if (!snapshot)
1692		return;
1693
1694	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1695	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1696		   snapshot->indirect_context_desc);
1697	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1698	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1699		   snapshot->tail.internal, snapshot->tail.memory);
1700	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1701	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1702	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1703	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1704
1705	if (!snapshot->lrc_snapshot)
1706		return;
1707
1708	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1709	drm_puts(p, "\t[HWSP].data: ");
1710	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1711		u32 *val = snapshot->lrc_snapshot + i;
1712		char dumped[ASCII85_BUFSZ];
1713
1714		drm_puts(p, ascii85_encode(*val, dumped));
1715	}
1716
1717	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1718	drm_puts(p, "\t[HWCTX].data: ");
1719	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1720		u32 *val = snapshot->lrc_snapshot + i;
1721		char dumped[ASCII85_BUFSZ];
1722
1723		drm_puts(p, ascii85_encode(*val, dumped));
1724	}
1725	drm_puts(p, "\n");
1726}
1727
1728void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1729{
1730	if (!snapshot)
1731		return;
1732
1733	kvfree(snapshot->lrc_snapshot);
1734	if (snapshot->lrc_bo) {
1735		struct xe_vm *vm;
1736
1737		vm = snapshot->lrc_bo->vm;
1738		xe_bo_put(snapshot->lrc_bo);
1739		if (vm)
1740			xe_vm_put(vm);
1741	}
1742	kfree(snapshot);
1743}
1744
1745/**
1746 * xe_lrc_update_timestamp() - Update ctx timestamp
1747 * @lrc: Pointer to the lrc.
1748 * @old_ts: Old timestamp value
1749 *
1750 * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1751 * update saved value.
1752 *
1753 * Returns: New ctx timestamp value
1754 */
1755u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1756{
1757	*old_ts = lrc->ctx_timestamp;
1758
1759	lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1760
1761	return lrc->ctx_timestamp;
1762}