Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_lrc.h"
7
8#include "instructions/xe_mi_commands.h"
9#include "instructions/xe_gfxpipe_commands.h"
10#include "regs/xe_engine_regs.h"
11#include "regs/xe_gpu_commands.h"
12#include "regs/xe_lrc_layout.h"
13#include "xe_bb.h"
14#include "xe_bo.h"
15#include "xe_device.h"
16#include "xe_drm_client.h"
17#include "xe_exec_queue_types.h"
18#include "xe_gt.h"
19#include "xe_gt_printk.h"
20#include "xe_hw_fence.h"
21#include "xe_map.h"
22#include "xe_memirq.h"
23#include "xe_sriov.h"
24#include "xe_vm.h"
25
26#define LRC_VALID (1 << 0)
27#define LRC_PRIVILEGE (1 << 8)
28#define LRC_ADDRESSING_MODE_SHIFT 3
29#define LRC_LEGACY_64B_CONTEXT 3
30
31#define ENGINE_CLASS_SHIFT 61
32#define ENGINE_INSTANCE_SHIFT 48
33
34static struct xe_device *
35lrc_to_xe(struct xe_lrc *lrc)
36{
37 return gt_to_xe(lrc->fence_ctx.gt);
38}
39
40size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
41{
42 switch (class) {
43 case XE_ENGINE_CLASS_RENDER:
44 if (GRAPHICS_VER(xe) >= 20)
45 return 4 * SZ_4K;
46 else
47 return 14 * SZ_4K;
48 case XE_ENGINE_CLASS_COMPUTE:
49 /* 14 pages since graphics_ver == 11 */
50 if (GRAPHICS_VER(xe) >= 20)
51 return 3 * SZ_4K;
52 else
53 return 14 * SZ_4K;
54 default:
55 WARN(1, "Unknown engine class: %d", class);
56 fallthrough;
57 case XE_ENGINE_CLASS_COPY:
58 case XE_ENGINE_CLASS_VIDEO_DECODE:
59 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
60 case XE_ENGINE_CLASS_OTHER:
61 return 2 * SZ_4K;
62 }
63}
64
65/*
66 * The per-platform tables are u8-encoded in @data. Decode @data and set the
67 * addresses' offset and commands in @regs. The following encoding is used
68 * for each byte. There are 2 steps: decoding commands and decoding addresses.
69 *
70 * Commands:
71 * [7]: create NOPs - number of NOPs are set in lower bits
72 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
73 * MI_LRI_FORCE_POSTED
74 * [5:0]: Number of NOPs or registers to set values to in case of
75 * MI_LOAD_REGISTER_IMM
76 *
77 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
78 * number of registers. They are set by using the REG/REG16 macros: the former
79 * is used for offsets smaller than 0x200 while the latter is for values bigger
80 * than that. Those macros already set all the bits documented below correctly:
81 *
82 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
83 * follow, for the lower bits
84 * [6:0]: Register offset, without considering the engine base.
85 *
86 * This function only tweaks the commands and register offsets. Values are not
87 * filled out.
88 */
89static void set_offsets(u32 *regs,
90 const u8 *data,
91 const struct xe_hw_engine *hwe)
92#define NOP(x) (BIT(7) | (x))
93#define LRI(count, flags) ((flags) << 6 | (count) | \
94 BUILD_BUG_ON_ZERO(count >= BIT(6)))
95#define POSTED BIT(0)
96#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
97#define REG16(x) \
98 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
99 (((x) >> 2) & 0x7f)
100{
101 const u32 base = hwe->mmio_base;
102
103 while (*data) {
104 u8 count, flags;
105
106 if (*data & BIT(7)) { /* skip */
107 count = *data++ & ~BIT(7);
108 regs += count;
109 continue;
110 }
111
112 count = *data & 0x3f;
113 flags = *data >> 6;
114 data++;
115
116 *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
117 if (flags & POSTED)
118 *regs |= MI_LRI_FORCE_POSTED;
119 *regs |= MI_LRI_LRM_CS_MMIO;
120 regs++;
121
122 xe_gt_assert(hwe->gt, count);
123 do {
124 u32 offset = 0;
125 u8 v;
126
127 do {
128 v = *data++;
129 offset <<= 7;
130 offset |= v & ~BIT(7);
131 } while (v & BIT(7));
132
133 regs[0] = base + (offset << 2);
134 regs += 2;
135 } while (--count);
136 }
137
138 *regs = MI_BATCH_BUFFER_END | BIT(0);
139}
140
141static const u8 gen12_xcs_offsets[] = {
142 NOP(1),
143 LRI(13, POSTED),
144 REG16(0x244),
145 REG(0x034),
146 REG(0x030),
147 REG(0x038),
148 REG(0x03c),
149 REG(0x168),
150 REG(0x140),
151 REG(0x110),
152 REG(0x1c0),
153 REG(0x1c4),
154 REG(0x1c8),
155 REG(0x180),
156 REG16(0x2b4),
157
158 NOP(5),
159 LRI(9, POSTED),
160 REG16(0x3a8),
161 REG16(0x28c),
162 REG16(0x288),
163 REG16(0x284),
164 REG16(0x280),
165 REG16(0x27c),
166 REG16(0x278),
167 REG16(0x274),
168 REG16(0x270),
169
170 0
171};
172
173static const u8 dg2_xcs_offsets[] = {
174 NOP(1),
175 LRI(15, POSTED),
176 REG16(0x244),
177 REG(0x034),
178 REG(0x030),
179 REG(0x038),
180 REG(0x03c),
181 REG(0x168),
182 REG(0x140),
183 REG(0x110),
184 REG(0x1c0),
185 REG(0x1c4),
186 REG(0x1c8),
187 REG(0x180),
188 REG16(0x2b4),
189 REG(0x120),
190 REG(0x124),
191
192 NOP(1),
193 LRI(9, POSTED),
194 REG16(0x3a8),
195 REG16(0x28c),
196 REG16(0x288),
197 REG16(0x284),
198 REG16(0x280),
199 REG16(0x27c),
200 REG16(0x278),
201 REG16(0x274),
202 REG16(0x270),
203
204 0
205};
206
207static const u8 gen12_rcs_offsets[] = {
208 NOP(1),
209 LRI(13, POSTED),
210 REG16(0x244),
211 REG(0x034),
212 REG(0x030),
213 REG(0x038),
214 REG(0x03c),
215 REG(0x168),
216 REG(0x140),
217 REG(0x110),
218 REG(0x1c0),
219 REG(0x1c4),
220 REG(0x1c8),
221 REG(0x180),
222 REG16(0x2b4),
223
224 NOP(5),
225 LRI(9, POSTED),
226 REG16(0x3a8),
227 REG16(0x28c),
228 REG16(0x288),
229 REG16(0x284),
230 REG16(0x280),
231 REG16(0x27c),
232 REG16(0x278),
233 REG16(0x274),
234 REG16(0x270),
235
236 LRI(3, POSTED),
237 REG(0x1b0),
238 REG16(0x5a8),
239 REG16(0x5ac),
240
241 NOP(6),
242 LRI(1, 0),
243 REG(0x0c8),
244 NOP(3 + 9 + 1),
245
246 LRI(51, POSTED),
247 REG16(0x588),
248 REG16(0x588),
249 REG16(0x588),
250 REG16(0x588),
251 REG16(0x588),
252 REG16(0x588),
253 REG(0x028),
254 REG(0x09c),
255 REG(0x0c0),
256 REG(0x178),
257 REG(0x17c),
258 REG16(0x358),
259 REG(0x170),
260 REG(0x150),
261 REG(0x154),
262 REG(0x158),
263 REG16(0x41c),
264 REG16(0x600),
265 REG16(0x604),
266 REG16(0x608),
267 REG16(0x60c),
268 REG16(0x610),
269 REG16(0x614),
270 REG16(0x618),
271 REG16(0x61c),
272 REG16(0x620),
273 REG16(0x624),
274 REG16(0x628),
275 REG16(0x62c),
276 REG16(0x630),
277 REG16(0x634),
278 REG16(0x638),
279 REG16(0x63c),
280 REG16(0x640),
281 REG16(0x644),
282 REG16(0x648),
283 REG16(0x64c),
284 REG16(0x650),
285 REG16(0x654),
286 REG16(0x658),
287 REG16(0x65c),
288 REG16(0x660),
289 REG16(0x664),
290 REG16(0x668),
291 REG16(0x66c),
292 REG16(0x670),
293 REG16(0x674),
294 REG16(0x678),
295 REG16(0x67c),
296 REG(0x068),
297 REG(0x084),
298 NOP(1),
299
300 0
301};
302
303static const u8 xehp_rcs_offsets[] = {
304 NOP(1),
305 LRI(13, POSTED),
306 REG16(0x244),
307 REG(0x034),
308 REG(0x030),
309 REG(0x038),
310 REG(0x03c),
311 REG(0x168),
312 REG(0x140),
313 REG(0x110),
314 REG(0x1c0),
315 REG(0x1c4),
316 REG(0x1c8),
317 REG(0x180),
318 REG16(0x2b4),
319
320 NOP(5),
321 LRI(9, POSTED),
322 REG16(0x3a8),
323 REG16(0x28c),
324 REG16(0x288),
325 REG16(0x284),
326 REG16(0x280),
327 REG16(0x27c),
328 REG16(0x278),
329 REG16(0x274),
330 REG16(0x270),
331
332 LRI(3, POSTED),
333 REG(0x1b0),
334 REG16(0x5a8),
335 REG16(0x5ac),
336
337 NOP(6),
338 LRI(1, 0),
339 REG(0x0c8),
340
341 0
342};
343
344static const u8 dg2_rcs_offsets[] = {
345 NOP(1),
346 LRI(15, POSTED),
347 REG16(0x244),
348 REG(0x034),
349 REG(0x030),
350 REG(0x038),
351 REG(0x03c),
352 REG(0x168),
353 REG(0x140),
354 REG(0x110),
355 REG(0x1c0),
356 REG(0x1c4),
357 REG(0x1c8),
358 REG(0x180),
359 REG16(0x2b4),
360 REG(0x120),
361 REG(0x124),
362
363 NOP(1),
364 LRI(9, POSTED),
365 REG16(0x3a8),
366 REG16(0x28c),
367 REG16(0x288),
368 REG16(0x284),
369 REG16(0x280),
370 REG16(0x27c),
371 REG16(0x278),
372 REG16(0x274),
373 REG16(0x270),
374
375 LRI(3, POSTED),
376 REG(0x1b0),
377 REG16(0x5a8),
378 REG16(0x5ac),
379
380 NOP(6),
381 LRI(1, 0),
382 REG(0x0c8),
383
384 0
385};
386
387static const u8 mtl_rcs_offsets[] = {
388 NOP(1),
389 LRI(15, POSTED),
390 REG16(0x244),
391 REG(0x034),
392 REG(0x030),
393 REG(0x038),
394 REG(0x03c),
395 REG(0x168),
396 REG(0x140),
397 REG(0x110),
398 REG(0x1c0),
399 REG(0x1c4),
400 REG(0x1c8),
401 REG(0x180),
402 REG16(0x2b4),
403 REG(0x120),
404 REG(0x124),
405
406 NOP(1),
407 LRI(9, POSTED),
408 REG16(0x3a8),
409 REG16(0x28c),
410 REG16(0x288),
411 REG16(0x284),
412 REG16(0x280),
413 REG16(0x27c),
414 REG16(0x278),
415 REG16(0x274),
416 REG16(0x270),
417
418 NOP(2),
419 LRI(2, POSTED),
420 REG16(0x5a8),
421 REG16(0x5ac),
422
423 NOP(6),
424 LRI(1, 0),
425 REG(0x0c8),
426
427 0
428};
429
430#define XE2_CTX_COMMON \
431 NOP(1), /* [0x00] */ \
432 LRI(15, POSTED), /* [0x01] */ \
433 REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
434 REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
435 REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
436 REG(0x038), /* [0x08] RING_BUFFER_START */ \
437 REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
438 REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
439 REG(0x140), /* [0x0e] BB_ADDR */ \
440 REG(0x110), /* [0x10] BB_STATE */ \
441 REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
442 REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
443 REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
444 REG(0x180), /* [0x18] CCID */ \
445 REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
446 REG(0x120), /* [0x1c] PRT_BB_STATE */ \
447 REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
448 \
449 NOP(1), /* [0x20] */ \
450 LRI(9, POSTED), /* [0x21] */ \
451 REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
452 REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
453 REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
454 REG16(0x284), /* [0x28] dummy reg */ \
455 REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
456 REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
457 REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
458 REG16(0x274), /* [0x30] PTBP_UDW */ \
459 REG16(0x270) /* [0x32] PTBP_LDW */
460
461static const u8 xe2_rcs_offsets[] = {
462 XE2_CTX_COMMON,
463
464 NOP(2), /* [0x34] */
465 LRI(2, POSTED), /* [0x36] */
466 REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
467 REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
468
469 NOP(6), /* [0x41] */
470 LRI(1, 0), /* [0x47] */
471 REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
472
473 0
474};
475
476static const u8 xe2_bcs_offsets[] = {
477 XE2_CTX_COMMON,
478
479 NOP(4 + 8 + 1), /* [0x34] */
480 LRI(2, POSTED), /* [0x41] */
481 REG16(0x200), /* [0x42] BCS_SWCTRL */
482 REG16(0x204), /* [0x44] BLIT_CCTL */
483
484 0
485};
486
487static const u8 xe2_xcs_offsets[] = {
488 XE2_CTX_COMMON,
489
490 0
491};
492
493#undef REG16
494#undef REG
495#undef LRI
496#undef NOP
497
498static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
499{
500 if (class == XE_ENGINE_CLASS_RENDER) {
501 if (GRAPHICS_VER(xe) >= 20)
502 return xe2_rcs_offsets;
503 else if (GRAPHICS_VERx100(xe) >= 1270)
504 return mtl_rcs_offsets;
505 else if (GRAPHICS_VERx100(xe) >= 1255)
506 return dg2_rcs_offsets;
507 else if (GRAPHICS_VERx100(xe) >= 1250)
508 return xehp_rcs_offsets;
509 else
510 return gen12_rcs_offsets;
511 } else if (class == XE_ENGINE_CLASS_COPY) {
512 if (GRAPHICS_VER(xe) >= 20)
513 return xe2_bcs_offsets;
514 else
515 return gen12_xcs_offsets;
516 } else {
517 if (GRAPHICS_VER(xe) >= 20)
518 return xe2_xcs_offsets;
519 else if (GRAPHICS_VERx100(xe) >= 1255)
520 return dg2_xcs_offsets;
521 else
522 return gen12_xcs_offsets;
523 }
524}
525
526static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
527{
528 regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
529 CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
530
531 /* TODO: Timestamp */
532}
533
534static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
535{
536 struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq;
537 struct xe_device *xe = gt_to_xe(hwe->gt);
538
539 if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
540 return;
541
542 regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
543 MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
544 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
545 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
546
547 regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
548 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
549 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
550 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
551 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
552 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
553}
554
555static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
556{
557 struct xe_device *xe = gt_to_xe(hwe->gt);
558
559 if (GRAPHICS_VERx100(xe) >= 1250)
560 return 0x70;
561 else
562 return 0x60;
563}
564
565static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
566{
567 int x;
568
569 x = lrc_ring_mi_mode(hwe);
570 regs[x + 1] &= ~STOP_RING;
571 regs[x + 1] |= STOP_RING << 16;
572}
573
574static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
575{
576 return 0;
577}
578
579u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
580{
581 return lrc->ring.size;
582}
583
584/* Make the magic macros work */
585#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
586
587#define LRC_SEQNO_PPHWSP_OFFSET 512
588#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
589#define LRC_PARALLEL_PPHWSP_OFFSET 2048
590#define LRC_PPHWSP_SIZE SZ_4K
591
592static size_t lrc_reg_size(struct xe_device *xe)
593{
594 if (GRAPHICS_VERx100(xe) >= 1250)
595 return 96 * sizeof(u32);
596 else
597 return 80 * sizeof(u32);
598}
599
600size_t xe_lrc_skip_size(struct xe_device *xe)
601{
602 return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
603}
604
605static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
606{
607 /* The seqno is stored in the driver-defined portion of PPHWSP */
608 return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
609}
610
611static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
612{
613 /* The start seqno is stored in the driver-defined portion of PPHWSP */
614 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
615}
616
617static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
618{
619 /* The parallel is stored in the driver-defined portion of PPHWSP */
620 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
621}
622
623static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
624{
625 return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
626}
627
628#define DECL_MAP_ADDR_HELPERS(elem) \
629static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
630{ \
631 struct iosys_map map = lrc->bo->vmap; \
632\
633 xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
634 iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
635 return map; \
636} \
637static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
638{ \
639 return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
640} \
641
642DECL_MAP_ADDR_HELPERS(ring)
643DECL_MAP_ADDR_HELPERS(pphwsp)
644DECL_MAP_ADDR_HELPERS(seqno)
645DECL_MAP_ADDR_HELPERS(regs)
646DECL_MAP_ADDR_HELPERS(start_seqno)
647DECL_MAP_ADDR_HELPERS(parallel)
648
649#undef DECL_MAP_ADDR_HELPERS
650
651u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
652{
653 return __xe_lrc_pphwsp_ggtt_addr(lrc);
654}
655
656u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
657{
658 struct xe_device *xe = lrc_to_xe(lrc);
659 struct iosys_map map;
660
661 map = __xe_lrc_regs_map(lrc);
662 iosys_map_incr(&map, reg_nr * sizeof(u32));
663 return xe_map_read32(xe, &map);
664}
665
666void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
667{
668 struct xe_device *xe = lrc_to_xe(lrc);
669 struct iosys_map map;
670
671 map = __xe_lrc_regs_map(lrc);
672 iosys_map_incr(&map, reg_nr * sizeof(u32));
673 xe_map_write32(xe, &map, val);
674}
675
676static void *empty_lrc_data(struct xe_hw_engine *hwe)
677{
678 struct xe_device *xe = gt_to_xe(hwe->gt);
679 void *data;
680 u32 *regs;
681
682 data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
683 if (!data)
684 return NULL;
685
686 /* 1st page: Per-Process of HW status Page */
687 regs = data + LRC_PPHWSP_SIZE;
688 set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
689 set_context_control(regs, hwe);
690 set_memory_based_intr(regs, hwe);
691 reset_stop_ring(regs, hwe);
692
693 return data;
694}
695
696static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
697{
698 u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
699
700 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
701 xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
702}
703
704#define PVC_CTX_ASID (0x2e + 1)
705#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
706
707int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
708 struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
709{
710 struct xe_gt *gt = hwe->gt;
711 struct xe_tile *tile = gt_to_tile(gt);
712 struct xe_device *xe = gt_to_xe(gt);
713 struct iosys_map map;
714 void *init_data = NULL;
715 u32 arb_enable;
716 int err;
717
718 lrc->flags = 0;
719
720 /*
721 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
722 * via VM bind calls.
723 */
724 lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
725 ring_size + xe_lrc_size(xe, hwe->class),
726 ttm_bo_type_kernel,
727 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
728 XE_BO_CREATE_GGTT_BIT);
729 if (IS_ERR(lrc->bo))
730 return PTR_ERR(lrc->bo);
731
732 lrc->tile = gt_to_tile(hwe->gt);
733 lrc->ring.size = ring_size;
734 lrc->ring.tail = 0;
735
736 xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
737 hwe->fence_irq, hwe->name);
738
739 if (!gt->default_lrc[hwe->class]) {
740 init_data = empty_lrc_data(hwe);
741 if (!init_data) {
742 err = -ENOMEM;
743 goto err_lrc_finish;
744 }
745 }
746
747 /*
748 * Init Per-Process of HW status Page, LRC / context state to known
749 * values
750 */
751 map = __xe_lrc_pphwsp_map(lrc);
752 if (!init_data) {
753 xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
754 xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
755 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
756 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
757 } else {
758 xe_map_memcpy_to(xe, &map, 0, init_data,
759 xe_lrc_size(xe, hwe->class));
760 kfree(init_data);
761 }
762
763 if (vm) {
764 xe_lrc_set_ppgtt(lrc, vm);
765
766 if (vm->xef)
767 xe_drm_client_add_bo(vm->xef->client, lrc->bo);
768 }
769
770 xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
771 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
772 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
773 xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
774 RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
775 if (xe->info.has_asid && vm)
776 xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
777
778 lrc->desc = LRC_VALID;
779 lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
780 /* TODO: Priority */
781
782 /* While this appears to have something about privileged batches or
783 * some such, it really just means PPGTT mode.
784 */
785 if (vm)
786 lrc->desc |= LRC_PRIVILEGE;
787
788 if (GRAPHICS_VERx100(xe) < 1250) {
789 lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
790 lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
791 }
792
793 arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
794 xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
795
796 map = __xe_lrc_seqno_map(lrc);
797 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
798
799 map = __xe_lrc_start_seqno_map(lrc);
800 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
801
802 return 0;
803
804err_lrc_finish:
805 xe_lrc_finish(lrc);
806 return err;
807}
808
809void xe_lrc_finish(struct xe_lrc *lrc)
810{
811 xe_hw_fence_ctx_finish(&lrc->fence_ctx);
812 xe_bo_lock(lrc->bo, false);
813 xe_bo_unpin(lrc->bo);
814 xe_bo_unlock(lrc->bo);
815 xe_bo_put(lrc->bo);
816}
817
818void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
819{
820 xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
821}
822
823u32 xe_lrc_ring_head(struct xe_lrc *lrc)
824{
825 return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
826}
827
828u32 xe_lrc_ring_space(struct xe_lrc *lrc)
829{
830 const u32 head = xe_lrc_ring_head(lrc);
831 const u32 tail = lrc->ring.tail;
832 const u32 size = lrc->ring.size;
833
834 return ((head - tail - 1) & (size - 1)) + 1;
835}
836
837static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
838 const void *data, size_t size)
839{
840 struct xe_device *xe = lrc_to_xe(lrc);
841
842 iosys_map_incr(&ring, lrc->ring.tail);
843 xe_map_memcpy_to(xe, &ring, 0, data, size);
844 lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
845}
846
847void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
848{
849 struct xe_device *xe = lrc_to_xe(lrc);
850 struct iosys_map ring;
851 u32 rhs;
852 size_t aligned_size;
853
854 xe_assert(xe, IS_ALIGNED(size, 4));
855 aligned_size = ALIGN(size, 8);
856
857 ring = __xe_lrc_ring_map(lrc);
858
859 xe_assert(xe, lrc->ring.tail < lrc->ring.size);
860 rhs = lrc->ring.size - lrc->ring.tail;
861 if (size > rhs) {
862 __xe_lrc_write_ring(lrc, ring, data, rhs);
863 __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
864 } else {
865 __xe_lrc_write_ring(lrc, ring, data, size);
866 }
867
868 if (aligned_size > size) {
869 u32 noop = MI_NOOP;
870
871 __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
872 }
873}
874
875u64 xe_lrc_descriptor(struct xe_lrc *lrc)
876{
877 return lrc->desc | xe_lrc_ggtt_addr(lrc);
878}
879
880u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
881{
882 return __xe_lrc_seqno_ggtt_addr(lrc);
883}
884
885struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
886{
887 return &xe_hw_fence_create(&lrc->fence_ctx,
888 __xe_lrc_seqno_map(lrc))->dma;
889}
890
891s32 xe_lrc_seqno(struct xe_lrc *lrc)
892{
893 struct iosys_map map = __xe_lrc_seqno_map(lrc);
894
895 return xe_map_read32(lrc_to_xe(lrc), &map);
896}
897
898s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
899{
900 struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
901
902 return xe_map_read32(lrc_to_xe(lrc), &map);
903}
904
905u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
906{
907 return __xe_lrc_start_seqno_ggtt_addr(lrc);
908}
909
910u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
911{
912 return __xe_lrc_parallel_ggtt_addr(lrc);
913}
914
915struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
916{
917 return __xe_lrc_parallel_map(lrc);
918}
919
920static int instr_dw(u32 cmd_header)
921{
922 /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
923 if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
924 GFXPIPE_SINGLE_DW_CMD(0, 0))
925 return 1;
926
927 /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
928 if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
929 return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
930
931 /* Most instructions have the # of dwords (minus 2) in 7:0 */
932 return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
933}
934
935static int dump_mi_command(struct drm_printer *p,
936 struct xe_gt *gt,
937 u32 *dw,
938 int remaining_dw)
939{
940 u32 inst_header = *dw;
941 u32 numdw = instr_dw(inst_header);
942 u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
943 int num_noop;
944
945 /* First check for commands that don't have/use a '# DW' field */
946 switch (inst_header & MI_OPCODE) {
947 case MI_NOOP:
948 num_noop = 1;
949 while (num_noop < remaining_dw &&
950 (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
951 num_noop++;
952 drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
953 return num_noop;
954
955 case MI_TOPOLOGY_FILTER:
956 drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
957 return 1;
958
959 case MI_BATCH_BUFFER_END:
960 drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
961 /* Return 'remaining_dw' to consume the rest of the LRC */
962 return remaining_dw;
963 }
964
965 /*
966 * Any remaining commands include a # of dwords. We should make sure
967 * it doesn't exceed the remaining size of the LRC.
968 */
969 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
970 numdw = remaining_dw;
971
972 switch (inst_header & MI_OPCODE) {
973 case MI_LOAD_REGISTER_IMM:
974 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
975 inst_header, (numdw - 1) / 2);
976 for (int i = 1; i < numdw; i += 2)
977 drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
978 return numdw;
979
980 case MI_LOAD_REGISTER_MEM & MI_OPCODE:
981 drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
982 inst_header,
983 dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
984 dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
985 if (numdw == 4)
986 drm_printf(p, " - %#6x = %#010llx\n",
987 dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
988 else
989 drm_printf(p, " - %*ph (%s)\n",
990 (int)sizeof(u32) * (numdw - 1), dw + 1,
991 numdw < 4 ? "truncated" : "malformed");
992 return numdw;
993
994 case MI_FORCE_WAKEUP:
995 drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
996 return numdw;
997
998 default:
999 drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1000 inst_header, opcode, numdw);
1001 return numdw;
1002 }
1003}
1004
1005static int dump_gfxpipe_command(struct drm_printer *p,
1006 struct xe_gt *gt,
1007 u32 *dw,
1008 int remaining_dw)
1009{
1010 u32 numdw = instr_dw(*dw);
1011 u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1012 u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1013 u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1014
1015 /*
1016 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1017 * remaining size of the LRC.
1018 */
1019 if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1020 numdw = remaining_dw;
1021
1022 switch (*dw & GFXPIPE_MATCH_MASK) {
1023#define MATCH(cmd) \
1024 case cmd: \
1025 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1026 return numdw
1027#define MATCH3D(cmd) \
1028 case CMD_##cmd: \
1029 drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1030 return numdw
1031
1032 MATCH(STATE_BASE_ADDRESS);
1033 MATCH(STATE_SIP);
1034 MATCH(GPGPU_CSR_BASE_ADDRESS);
1035 MATCH(STATE_COMPUTE_MODE);
1036 MATCH3D(3DSTATE_BTD);
1037
1038 MATCH3D(3DSTATE_VF_STATISTICS);
1039
1040 MATCH(PIPELINE_SELECT);
1041
1042 MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1043 MATCH3D(3DSTATE_CLEAR_PARAMS);
1044 MATCH3D(3DSTATE_DEPTH_BUFFER);
1045 MATCH3D(3DSTATE_STENCIL_BUFFER);
1046 MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1047 MATCH3D(3DSTATE_VERTEX_BUFFERS);
1048 MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1049 MATCH3D(3DSTATE_INDEX_BUFFER);
1050 MATCH3D(3DSTATE_VF);
1051 MATCH3D(3DSTATE_MULTISAMPLE);
1052 MATCH3D(3DSTATE_CC_STATE_POINTERS);
1053 MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1054 MATCH3D(3DSTATE_VS);
1055 MATCH3D(3DSTATE_GS);
1056 MATCH3D(3DSTATE_CLIP);
1057 MATCH3D(3DSTATE_SF);
1058 MATCH3D(3DSTATE_WM);
1059 MATCH3D(3DSTATE_CONSTANT_VS);
1060 MATCH3D(3DSTATE_CONSTANT_GS);
1061 MATCH3D(3DSTATE_SAMPLE_MASK);
1062 MATCH3D(3DSTATE_CONSTANT_HS);
1063 MATCH3D(3DSTATE_CONSTANT_DS);
1064 MATCH3D(3DSTATE_HS);
1065 MATCH3D(3DSTATE_TE);
1066 MATCH3D(3DSTATE_DS);
1067 MATCH3D(3DSTATE_STREAMOUT);
1068 MATCH3D(3DSTATE_SBE);
1069 MATCH3D(3DSTATE_PS);
1070 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1071 MATCH3D(3DSTATE_CPS_POINTERS);
1072 MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1073 MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1074 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1075 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1076 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1077 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1078 MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1079 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1080 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1081 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1082 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1083 MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1084 MATCH3D(3DSTATE_VF_INSTANCING);
1085 MATCH3D(3DSTATE_VF_SGVS);
1086 MATCH3D(3DSTATE_VF_TOPOLOGY);
1087 MATCH3D(3DSTATE_WM_CHROMAKEY);
1088 MATCH3D(3DSTATE_PS_BLEND);
1089 MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1090 MATCH3D(3DSTATE_PS_EXTRA);
1091 MATCH3D(3DSTATE_RASTER);
1092 MATCH3D(3DSTATE_SBE_SWIZ);
1093 MATCH3D(3DSTATE_WM_HZ_OP);
1094 MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1095 MATCH3D(3DSTATE_VF_SGVS_2);
1096 MATCH3D(3DSTATE_VFG);
1097 MATCH3D(3DSTATE_URB_ALLOC_VS);
1098 MATCH3D(3DSTATE_URB_ALLOC_HS);
1099 MATCH3D(3DSTATE_URB_ALLOC_DS);
1100 MATCH3D(3DSTATE_URB_ALLOC_GS);
1101 MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1102 MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1103 MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1104 MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1105 MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1106 MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1107 MATCH3D(3DSTATE_AMFS);
1108 MATCH3D(3DSTATE_DEPTH_BOUNDS);
1109 MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1110 MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1111 MATCH3D(3DSTATE_MESH_CONTROL);
1112 MATCH3D(3DSTATE_MESH_DISTRIB);
1113 MATCH3D(3DSTATE_TASK_REDISTRIB);
1114 MATCH3D(3DSTATE_MESH_SHADER);
1115 MATCH3D(3DSTATE_MESH_SHADER_DATA);
1116 MATCH3D(3DSTATE_TASK_CONTROL);
1117 MATCH3D(3DSTATE_TASK_SHADER);
1118 MATCH3D(3DSTATE_TASK_SHADER_DATA);
1119 MATCH3D(3DSTATE_URB_ALLOC_MESH);
1120 MATCH3D(3DSTATE_URB_ALLOC_TASK);
1121 MATCH3D(3DSTATE_CLIP_MESH);
1122 MATCH3D(3DSTATE_SBE_MESH);
1123 MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1124
1125 MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1126 MATCH3D(3DSTATE_CHROMA_KEY);
1127 MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1128 MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1129 MATCH3D(3DSTATE_LINE_STIPPLE);
1130 MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1131 MATCH3D(3DSTATE_MONOFILTER_SIZE);
1132 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1133 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1134 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1135 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1136 MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1137 MATCH3D(3DSTATE_SO_DECL_LIST);
1138 MATCH3D(3DSTATE_SO_BUFFER);
1139 MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1140 MATCH3D(3DSTATE_SAMPLE_PATTERN);
1141 MATCH3D(3DSTATE_3D_MODE);
1142 MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1143 MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1144 MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1145
1146 default:
1147 drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1148 *dw, pipeline, opcode, subopcode, numdw);
1149 return numdw;
1150 }
1151}
1152
1153void xe_lrc_dump_default(struct drm_printer *p,
1154 struct xe_gt *gt,
1155 enum xe_engine_class hwe_class)
1156{
1157 u32 *dw;
1158 int remaining_dw, num_dw;
1159
1160 if (!gt->default_lrc[hwe_class]) {
1161 drm_printf(p, "No default LRC for class %d\n", hwe_class);
1162 return;
1163 }
1164
1165 /*
1166 * Skip the beginning of the LRC since it contains the per-process
1167 * hardware status page.
1168 */
1169 dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1170 remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
1171
1172 while (remaining_dw > 0) {
1173 if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1174 num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1175 } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1176 num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1177 } else {
1178 num_dw = min(instr_dw(*dw), remaining_dw);
1179 drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1180 *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1181 num_dw);
1182 }
1183
1184 dw += num_dw;
1185 remaining_dw -= num_dw;
1186 }
1187}
1188
1189struct instr_state {
1190 u32 instr;
1191 u16 num_dw;
1192};
1193
1194static const struct instr_state xe_hpg_svg_state[] = {
1195 { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1196 { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1197 { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1198 { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1199 { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1200 { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1201 { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1202 { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1203 { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1204 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1205 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1206 { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1207 { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1208 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1209 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1210 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1211 { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1212 { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1213 { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1214 { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1215 { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1216 { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1217 { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1218 { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1219 { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1220 { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1221 { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1222 { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1223 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1224 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1225 { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1226 { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1227 { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1228 { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1229 { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1230 { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1231 { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1232 { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1233 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1234 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1235 { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1236 { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1237 { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1238 { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1239 { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1240 { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1241 { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1242 { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1243 { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1244 { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1245};
1246
1247void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1248{
1249 struct xe_gt *gt = q->hwe->gt;
1250 struct xe_device *xe = gt_to_xe(gt);
1251 const struct instr_state *state_table = NULL;
1252 int state_table_size = 0;
1253
1254 /*
1255 * At the moment we only need to emit non-register state for the RCS
1256 * engine.
1257 */
1258 if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1259 return;
1260
1261 switch (GRAPHICS_VERx100(xe)) {
1262 case 1255:
1263 case 1270 ... 2004:
1264 state_table = xe_hpg_svg_state;
1265 state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1266 break;
1267 default:
1268 xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1269 GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1270 return;
1271 }
1272
1273 for (int i = 0; i < state_table_size; i++) {
1274 u32 instr = state_table[i].instr;
1275 u16 num_dw = state_table[i].num_dw;
1276 bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1277
1278 xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1279 xe_gt_assert(gt, num_dw != 0);
1280 xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1281
1282 /*
1283 * Xe2's SVG context is the same as the one on DG2 / MTL
1284 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1285 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1286 * Just make the replacement here rather than defining a
1287 * whole separate table for the single trivial change.
1288 */
1289 if (GRAPHICS_VER(xe) >= 20 &&
1290 instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1291 instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1292
1293 bb->cs[bb->len] = instr;
1294 if (!is_single_dw)
1295 bb->cs[bb->len] |= (num_dw - 2);
1296
1297 bb->len += num_dw;
1298 }
1299}