Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2014 Intel Corporation
4 */
5
6#include "gem/i915_gem_lmem.h"
7
8#include "gen8_engine_cs.h"
9#include "i915_drv.h"
10#include "i915_perf.h"
11#include "intel_engine.h"
12#include "intel_gpu_commands.h"
13#include "intel_gt.h"
14#include "intel_lrc.h"
15#include "intel_lrc_reg.h"
16#include "intel_ring.h"
17#include "shmem_utils.h"
18
19static void set_offsets(u32 *regs,
20 const u8 *data,
21 const struct intel_engine_cs *engine,
22 bool close)
23#define NOP(x) (BIT(7) | (x))
24#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
25#define POSTED BIT(0)
26#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
27#define REG16(x) \
28 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
29 (((x) >> 2) & 0x7f)
30#define END 0
31{
32 const u32 base = engine->mmio_base;
33
34 while (*data) {
35 u8 count, flags;
36
37 if (*data & BIT(7)) { /* skip */
38 count = *data++ & ~BIT(7);
39 regs += count;
40 continue;
41 }
42
43 count = *data & 0x3f;
44 flags = *data >> 6;
45 data++;
46
47 *regs = MI_LOAD_REGISTER_IMM(count);
48 if (flags & POSTED)
49 *regs |= MI_LRI_FORCE_POSTED;
50 if (GRAPHICS_VER(engine->i915) >= 11)
51 *regs |= MI_LRI_LRM_CS_MMIO;
52 regs++;
53
54 GEM_BUG_ON(!count);
55 do {
56 u32 offset = 0;
57 u8 v;
58
59 do {
60 v = *data++;
61 offset <<= 7;
62 offset |= v & ~BIT(7);
63 } while (v & BIT(7));
64
65 regs[0] = base + (offset << 2);
66 regs += 2;
67 } while (--count);
68 }
69
70 if (close) {
71 /* Close the batch; used mainly by live_lrc_layout() */
72 *regs = MI_BATCH_BUFFER_END;
73 if (GRAPHICS_VER(engine->i915) >= 10)
74 *regs |= BIT(0);
75 }
76}
77
78static const u8 gen8_xcs_offsets[] = {
79 NOP(1),
80 LRI(11, 0),
81 REG16(0x244),
82 REG(0x034),
83 REG(0x030),
84 REG(0x038),
85 REG(0x03c),
86 REG(0x168),
87 REG(0x140),
88 REG(0x110),
89 REG(0x11c),
90 REG(0x114),
91 REG(0x118),
92
93 NOP(9),
94 LRI(9, 0),
95 REG16(0x3a8),
96 REG16(0x28c),
97 REG16(0x288),
98 REG16(0x284),
99 REG16(0x280),
100 REG16(0x27c),
101 REG16(0x278),
102 REG16(0x274),
103 REG16(0x270),
104
105 NOP(13),
106 LRI(2, 0),
107 REG16(0x200),
108 REG(0x028),
109
110 END
111};
112
113static const u8 gen9_xcs_offsets[] = {
114 NOP(1),
115 LRI(14, POSTED),
116 REG16(0x244),
117 REG(0x034),
118 REG(0x030),
119 REG(0x038),
120 REG(0x03c),
121 REG(0x168),
122 REG(0x140),
123 REG(0x110),
124 REG(0x11c),
125 REG(0x114),
126 REG(0x118),
127 REG(0x1c0),
128 REG(0x1c4),
129 REG(0x1c8),
130
131 NOP(3),
132 LRI(9, POSTED),
133 REG16(0x3a8),
134 REG16(0x28c),
135 REG16(0x288),
136 REG16(0x284),
137 REG16(0x280),
138 REG16(0x27c),
139 REG16(0x278),
140 REG16(0x274),
141 REG16(0x270),
142
143 NOP(13),
144 LRI(1, POSTED),
145 REG16(0x200),
146
147 NOP(13),
148 LRI(44, POSTED),
149 REG(0x028),
150 REG(0x09c),
151 REG(0x0c0),
152 REG(0x178),
153 REG(0x17c),
154 REG16(0x358),
155 REG(0x170),
156 REG(0x150),
157 REG(0x154),
158 REG(0x158),
159 REG16(0x41c),
160 REG16(0x600),
161 REG16(0x604),
162 REG16(0x608),
163 REG16(0x60c),
164 REG16(0x610),
165 REG16(0x614),
166 REG16(0x618),
167 REG16(0x61c),
168 REG16(0x620),
169 REG16(0x624),
170 REG16(0x628),
171 REG16(0x62c),
172 REG16(0x630),
173 REG16(0x634),
174 REG16(0x638),
175 REG16(0x63c),
176 REG16(0x640),
177 REG16(0x644),
178 REG16(0x648),
179 REG16(0x64c),
180 REG16(0x650),
181 REG16(0x654),
182 REG16(0x658),
183 REG16(0x65c),
184 REG16(0x660),
185 REG16(0x664),
186 REG16(0x668),
187 REG16(0x66c),
188 REG16(0x670),
189 REG16(0x674),
190 REG16(0x678),
191 REG16(0x67c),
192 REG(0x068),
193
194 END
195};
196
197static const u8 gen12_xcs_offsets[] = {
198 NOP(1),
199 LRI(13, POSTED),
200 REG16(0x244),
201 REG(0x034),
202 REG(0x030),
203 REG(0x038),
204 REG(0x03c),
205 REG(0x168),
206 REG(0x140),
207 REG(0x110),
208 REG(0x1c0),
209 REG(0x1c4),
210 REG(0x1c8),
211 REG(0x180),
212 REG16(0x2b4),
213
214 NOP(5),
215 LRI(9, POSTED),
216 REG16(0x3a8),
217 REG16(0x28c),
218 REG16(0x288),
219 REG16(0x284),
220 REG16(0x280),
221 REG16(0x27c),
222 REG16(0x278),
223 REG16(0x274),
224 REG16(0x270),
225
226 END
227};
228
229static const u8 gen8_rcs_offsets[] = {
230 NOP(1),
231 LRI(14, POSTED),
232 REG16(0x244),
233 REG(0x034),
234 REG(0x030),
235 REG(0x038),
236 REG(0x03c),
237 REG(0x168),
238 REG(0x140),
239 REG(0x110),
240 REG(0x11c),
241 REG(0x114),
242 REG(0x118),
243 REG(0x1c0),
244 REG(0x1c4),
245 REG(0x1c8),
246
247 NOP(3),
248 LRI(9, POSTED),
249 REG16(0x3a8),
250 REG16(0x28c),
251 REG16(0x288),
252 REG16(0x284),
253 REG16(0x280),
254 REG16(0x27c),
255 REG16(0x278),
256 REG16(0x274),
257 REG16(0x270),
258
259 NOP(13),
260 LRI(1, 0),
261 REG(0x0c8),
262
263 END
264};
265
266static const u8 gen9_rcs_offsets[] = {
267 NOP(1),
268 LRI(14, POSTED),
269 REG16(0x244),
270 REG(0x34),
271 REG(0x30),
272 REG(0x38),
273 REG(0x3c),
274 REG(0x168),
275 REG(0x140),
276 REG(0x110),
277 REG(0x11c),
278 REG(0x114),
279 REG(0x118),
280 REG(0x1c0),
281 REG(0x1c4),
282 REG(0x1c8),
283
284 NOP(3),
285 LRI(9, POSTED),
286 REG16(0x3a8),
287 REG16(0x28c),
288 REG16(0x288),
289 REG16(0x284),
290 REG16(0x280),
291 REG16(0x27c),
292 REG16(0x278),
293 REG16(0x274),
294 REG16(0x270),
295
296 NOP(13),
297 LRI(1, 0),
298 REG(0xc8),
299
300 NOP(13),
301 LRI(44, POSTED),
302 REG(0x28),
303 REG(0x9c),
304 REG(0xc0),
305 REG(0x178),
306 REG(0x17c),
307 REG16(0x358),
308 REG(0x170),
309 REG(0x150),
310 REG(0x154),
311 REG(0x158),
312 REG16(0x41c),
313 REG16(0x600),
314 REG16(0x604),
315 REG16(0x608),
316 REG16(0x60c),
317 REG16(0x610),
318 REG16(0x614),
319 REG16(0x618),
320 REG16(0x61c),
321 REG16(0x620),
322 REG16(0x624),
323 REG16(0x628),
324 REG16(0x62c),
325 REG16(0x630),
326 REG16(0x634),
327 REG16(0x638),
328 REG16(0x63c),
329 REG16(0x640),
330 REG16(0x644),
331 REG16(0x648),
332 REG16(0x64c),
333 REG16(0x650),
334 REG16(0x654),
335 REG16(0x658),
336 REG16(0x65c),
337 REG16(0x660),
338 REG16(0x664),
339 REG16(0x668),
340 REG16(0x66c),
341 REG16(0x670),
342 REG16(0x674),
343 REG16(0x678),
344 REG16(0x67c),
345 REG(0x68),
346
347 END
348};
349
350static const u8 gen11_rcs_offsets[] = {
351 NOP(1),
352 LRI(15, POSTED),
353 REG16(0x244),
354 REG(0x034),
355 REG(0x030),
356 REG(0x038),
357 REG(0x03c),
358 REG(0x168),
359 REG(0x140),
360 REG(0x110),
361 REG(0x11c),
362 REG(0x114),
363 REG(0x118),
364 REG(0x1c0),
365 REG(0x1c4),
366 REG(0x1c8),
367 REG(0x180),
368
369 NOP(1),
370 LRI(9, POSTED),
371 REG16(0x3a8),
372 REG16(0x28c),
373 REG16(0x288),
374 REG16(0x284),
375 REG16(0x280),
376 REG16(0x27c),
377 REG16(0x278),
378 REG16(0x274),
379 REG16(0x270),
380
381 LRI(1, POSTED),
382 REG(0x1b0),
383
384 NOP(10),
385 LRI(1, 0),
386 REG(0x0c8),
387
388 END
389};
390
391static const u8 gen12_rcs_offsets[] = {
392 NOP(1),
393 LRI(13, POSTED),
394 REG16(0x244),
395 REG(0x034),
396 REG(0x030),
397 REG(0x038),
398 REG(0x03c),
399 REG(0x168),
400 REG(0x140),
401 REG(0x110),
402 REG(0x1c0),
403 REG(0x1c4),
404 REG(0x1c8),
405 REG(0x180),
406 REG16(0x2b4),
407
408 NOP(5),
409 LRI(9, POSTED),
410 REG16(0x3a8),
411 REG16(0x28c),
412 REG16(0x288),
413 REG16(0x284),
414 REG16(0x280),
415 REG16(0x27c),
416 REG16(0x278),
417 REG16(0x274),
418 REG16(0x270),
419
420 LRI(3, POSTED),
421 REG(0x1b0),
422 REG16(0x5a8),
423 REG16(0x5ac),
424
425 NOP(6),
426 LRI(1, 0),
427 REG(0x0c8),
428 NOP(3 + 9 + 1),
429
430 LRI(51, POSTED),
431 REG16(0x588),
432 REG16(0x588),
433 REG16(0x588),
434 REG16(0x588),
435 REG16(0x588),
436 REG16(0x588),
437 REG(0x028),
438 REG(0x09c),
439 REG(0x0c0),
440 REG(0x178),
441 REG(0x17c),
442 REG16(0x358),
443 REG(0x170),
444 REG(0x150),
445 REG(0x154),
446 REG(0x158),
447 REG16(0x41c),
448 REG16(0x600),
449 REG16(0x604),
450 REG16(0x608),
451 REG16(0x60c),
452 REG16(0x610),
453 REG16(0x614),
454 REG16(0x618),
455 REG16(0x61c),
456 REG16(0x620),
457 REG16(0x624),
458 REG16(0x628),
459 REG16(0x62c),
460 REG16(0x630),
461 REG16(0x634),
462 REG16(0x638),
463 REG16(0x63c),
464 REG16(0x640),
465 REG16(0x644),
466 REG16(0x648),
467 REG16(0x64c),
468 REG16(0x650),
469 REG16(0x654),
470 REG16(0x658),
471 REG16(0x65c),
472 REG16(0x660),
473 REG16(0x664),
474 REG16(0x668),
475 REG16(0x66c),
476 REG16(0x670),
477 REG16(0x674),
478 REG16(0x678),
479 REG16(0x67c),
480 REG(0x068),
481 REG(0x084),
482 NOP(1),
483
484 END
485};
486
487#undef END
488#undef REG16
489#undef REG
490#undef LRI
491#undef NOP
492
493static const u8 *reg_offsets(const struct intel_engine_cs *engine)
494{
495 /*
496 * The gen12+ lists only have the registers we program in the basic
497 * default state. We rely on the context image using relative
498 * addressing to automatic fixup the register state between the
499 * physical engines for virtual engine.
500 */
501 GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
502 !intel_engine_has_relative_mmio(engine));
503
504 if (engine->class == RENDER_CLASS) {
505 if (GRAPHICS_VER(engine->i915) >= 12)
506 return gen12_rcs_offsets;
507 else if (GRAPHICS_VER(engine->i915) >= 11)
508 return gen11_rcs_offsets;
509 else if (GRAPHICS_VER(engine->i915) >= 9)
510 return gen9_rcs_offsets;
511 else
512 return gen8_rcs_offsets;
513 } else {
514 if (GRAPHICS_VER(engine->i915) >= 12)
515 return gen12_xcs_offsets;
516 else if (GRAPHICS_VER(engine->i915) >= 9)
517 return gen9_xcs_offsets;
518 else
519 return gen8_xcs_offsets;
520 }
521}
522
523static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
524{
525 if (GRAPHICS_VER(engine->i915) >= 12)
526 return 0x60;
527 else if (GRAPHICS_VER(engine->i915) >= 9)
528 return 0x54;
529 else if (engine->class == RENDER_CLASS)
530 return 0x58;
531 else
532 return -1;
533}
534
535static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
536{
537 if (GRAPHICS_VER(engine->i915) >= 12)
538 return 0x74;
539 else if (GRAPHICS_VER(engine->i915) >= 9)
540 return 0x68;
541 else if (engine->class == RENDER_CLASS)
542 return 0xd8;
543 else
544 return -1;
545}
546
547static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
548{
549 if (GRAPHICS_VER(engine->i915) >= 12)
550 return 0x12;
551 else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
552 return 0x18;
553 else
554 return -1;
555}
556
557static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
558{
559 int x;
560
561 x = lrc_ring_wa_bb_per_ctx(engine);
562 if (x < 0)
563 return x;
564
565 return x + 2;
566}
567
568static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
569{
570 int x;
571
572 x = lrc_ring_indirect_ptr(engine);
573 if (x < 0)
574 return x;
575
576 return x + 2;
577}
578
579static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
580{
581 if (engine->class != RENDER_CLASS)
582 return -1;
583
584 if (GRAPHICS_VER(engine->i915) >= 12)
585 return 0xb6;
586 else if (GRAPHICS_VER(engine->i915) >= 11)
587 return 0xaa;
588 else
589 return -1;
590}
591
592static u32
593lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
594{
595 switch (GRAPHICS_VER(engine->i915)) {
596 default:
597 MISSING_CASE(GRAPHICS_VER(engine->i915));
598 fallthrough;
599 case 12:
600 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
601 case 11:
602 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
603 case 10:
604 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
605 case 9:
606 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
607 case 8:
608 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
609 }
610}
611
612static void
613lrc_setup_indirect_ctx(u32 *regs,
614 const struct intel_engine_cs *engine,
615 u32 ctx_bb_ggtt_addr,
616 u32 size)
617{
618 GEM_BUG_ON(!size);
619 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
620 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
621 regs[lrc_ring_indirect_ptr(engine) + 1] =
622 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
623
624 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
625 regs[lrc_ring_indirect_offset(engine) + 1] =
626 lrc_ring_indirect_offset_default(engine) << 6;
627}
628
629static void init_common_regs(u32 * const regs,
630 const struct intel_context *ce,
631 const struct intel_engine_cs *engine,
632 bool inhibit)
633{
634 u32 ctl;
635
636 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
637 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
638 if (inhibit)
639 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
640 if (GRAPHICS_VER(engine->i915) < 11)
641 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
642 CTX_CTRL_RS_CTX_ENABLE);
643 regs[CTX_CONTEXT_CONTROL] = ctl;
644
645 regs[CTX_TIMESTAMP] = ce->runtime.last;
646}
647
648static void init_wa_bb_regs(u32 * const regs,
649 const struct intel_engine_cs *engine)
650{
651 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
652
653 if (wa_ctx->per_ctx.size) {
654 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
655
656 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
657 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
658 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
659 }
660
661 if (wa_ctx->indirect_ctx.size) {
662 lrc_setup_indirect_ctx(regs, engine,
663 i915_ggtt_offset(wa_ctx->vma) +
664 wa_ctx->indirect_ctx.offset,
665 wa_ctx->indirect_ctx.size);
666 }
667}
668
669static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
670{
671 if (i915_vm_is_4lvl(&ppgtt->vm)) {
672 /* 64b PPGTT (48bit canonical)
673 * PDP0_DESCRIPTOR contains the base address to PML4 and
674 * other PDP Descriptors are ignored.
675 */
676 ASSIGN_CTX_PML4(ppgtt, regs);
677 } else {
678 ASSIGN_CTX_PDP(ppgtt, regs, 3);
679 ASSIGN_CTX_PDP(ppgtt, regs, 2);
680 ASSIGN_CTX_PDP(ppgtt, regs, 1);
681 ASSIGN_CTX_PDP(ppgtt, regs, 0);
682 }
683}
684
685static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
686{
687 if (i915_is_ggtt(vm))
688 return i915_vm_to_ggtt(vm)->alias;
689 else
690 return i915_vm_to_ppgtt(vm);
691}
692
693static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
694{
695 int x;
696
697 x = lrc_ring_mi_mode(engine);
698 if (x != -1) {
699 regs[x + 1] &= ~STOP_RING;
700 regs[x + 1] |= STOP_RING << 16;
701 }
702}
703
704static void __lrc_init_regs(u32 *regs,
705 const struct intel_context *ce,
706 const struct intel_engine_cs *engine,
707 bool inhibit)
708{
709 /*
710 * A context is actually a big batch buffer with several
711 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
712 * values we are setting here are only for the first context restore:
713 * on a subsequent save, the GPU will recreate this batchbuffer with new
714 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
715 * we are not initializing here).
716 *
717 * Must keep consistent with virtual_update_register_offsets().
718 */
719
720 if (inhibit)
721 memset(regs, 0, PAGE_SIZE);
722
723 set_offsets(regs, reg_offsets(engine), engine, inhibit);
724
725 init_common_regs(regs, ce, engine, inhibit);
726 init_ppgtt_regs(regs, vm_alias(ce->vm));
727
728 init_wa_bb_regs(regs, engine);
729
730 __reset_stop_ring(regs, engine);
731}
732
733void lrc_init_regs(const struct intel_context *ce,
734 const struct intel_engine_cs *engine,
735 bool inhibit)
736{
737 __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
738}
739
740void lrc_reset_regs(const struct intel_context *ce,
741 const struct intel_engine_cs *engine)
742{
743 __reset_stop_ring(ce->lrc_reg_state, engine);
744}
745
746static void
747set_redzone(void *vaddr, const struct intel_engine_cs *engine)
748{
749 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
750 return;
751
752 vaddr += engine->context_size;
753
754 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
755}
756
757static void
758check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
759{
760 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
761 return;
762
763 vaddr += engine->context_size;
764
765 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
766 drm_err_once(&engine->i915->drm,
767 "%s context redzone overwritten!\n",
768 engine->name);
769}
770
771void lrc_init_state(struct intel_context *ce,
772 struct intel_engine_cs *engine,
773 void *state)
774{
775 bool inhibit = true;
776
777 set_redzone(state, engine);
778
779 if (engine->default_state) {
780 shmem_read(engine->default_state, 0,
781 state, engine->context_size);
782 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
783 inhibit = false;
784 }
785
786 /* Clear the ppHWSP (inc. per-context counters) */
787 memset(state, 0, PAGE_SIZE);
788
789 /*
790 * The second page of the context object contains some registers which
791 * must be set up prior to the first execution.
792 */
793 __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
794}
795
796static struct i915_vma *
797__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
798{
799 struct drm_i915_gem_object *obj;
800 struct i915_vma *vma;
801 u32 context_size;
802
803 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
804
805 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
806 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
807
808 if (GRAPHICS_VER(engine->i915) == 12) {
809 ce->wa_bb_page = context_size / PAGE_SIZE;
810 context_size += PAGE_SIZE;
811 }
812
813 obj = i915_gem_object_create_lmem(engine->i915, context_size, 0);
814 if (IS_ERR(obj))
815 obj = i915_gem_object_create_shmem(engine->i915, context_size);
816 if (IS_ERR(obj))
817 return ERR_CAST(obj);
818
819 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
820 if (IS_ERR(vma)) {
821 i915_gem_object_put(obj);
822 return vma;
823 }
824
825 return vma;
826}
827
828static struct intel_timeline *
829pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
830{
831 struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
832
833 return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
834}
835
836int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
837{
838 struct intel_ring *ring;
839 struct i915_vma *vma;
840 int err;
841
842 GEM_BUG_ON(ce->state);
843
844 vma = __lrc_alloc_state(ce, engine);
845 if (IS_ERR(vma))
846 return PTR_ERR(vma);
847
848 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
849 if (IS_ERR(ring)) {
850 err = PTR_ERR(ring);
851 goto err_vma;
852 }
853
854 if (!page_mask_bits(ce->timeline)) {
855 struct intel_timeline *tl;
856
857 /*
858 * Use the static global HWSP for the kernel context, and
859 * a dynamically allocated cacheline for everyone else.
860 */
861 if (unlikely(ce->timeline))
862 tl = pinned_timeline(ce, engine);
863 else
864 tl = intel_timeline_create(engine->gt);
865 if (IS_ERR(tl)) {
866 err = PTR_ERR(tl);
867 goto err_ring;
868 }
869
870 ce->timeline = tl;
871 }
872
873 ce->ring = ring;
874 ce->state = vma;
875
876 return 0;
877
878err_ring:
879 intel_ring_put(ring);
880err_vma:
881 i915_vma_put(vma);
882 return err;
883}
884
885void lrc_reset(struct intel_context *ce)
886{
887 GEM_BUG_ON(!intel_context_is_pinned(ce));
888
889 intel_ring_reset(ce->ring, ce->ring->emit);
890
891 /* Scrub away the garbage */
892 lrc_init_regs(ce, ce->engine, true);
893 ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
894}
895
896int
897lrc_pre_pin(struct intel_context *ce,
898 struct intel_engine_cs *engine,
899 struct i915_gem_ww_ctx *ww,
900 void **vaddr)
901{
902 GEM_BUG_ON(!ce->state);
903 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
904
905 *vaddr = i915_gem_object_pin_map(ce->state->obj,
906 i915_coherent_map_type(ce->engine->i915,
907 ce->state->obj,
908 false) |
909 I915_MAP_OVERRIDE);
910
911 return PTR_ERR_OR_ZERO(*vaddr);
912}
913
914int
915lrc_pin(struct intel_context *ce,
916 struct intel_engine_cs *engine,
917 void *vaddr)
918{
919 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
920
921 if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
922 lrc_init_state(ce, engine, vaddr);
923
924 ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
925 return 0;
926}
927
928void lrc_unpin(struct intel_context *ce)
929{
930 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
931 ce->engine);
932}
933
934void lrc_post_unpin(struct intel_context *ce)
935{
936 i915_gem_object_unpin_map(ce->state->obj);
937}
938
939void lrc_fini(struct intel_context *ce)
940{
941 if (!ce->state)
942 return;
943
944 intel_ring_put(fetch_and_zero(&ce->ring));
945 i915_vma_put(fetch_and_zero(&ce->state));
946}
947
948void lrc_destroy(struct kref *kref)
949{
950 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
951
952 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
953 GEM_BUG_ON(intel_context_is_pinned(ce));
954
955 lrc_fini(ce);
956
957 intel_context_fini(ce);
958 intel_context_free(ce);
959}
960
961static u32 *
962gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
963{
964 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
965 MI_SRM_LRM_GLOBAL_GTT |
966 MI_LRI_LRM_CS_MMIO;
967 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
968 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
969 CTX_TIMESTAMP * sizeof(u32);
970 *cs++ = 0;
971
972 *cs++ = MI_LOAD_REGISTER_REG |
973 MI_LRR_SOURCE_CS_MMIO |
974 MI_LRI_LRM_CS_MMIO;
975 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
976 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
977
978 *cs++ = MI_LOAD_REGISTER_REG |
979 MI_LRR_SOURCE_CS_MMIO |
980 MI_LRI_LRM_CS_MMIO;
981 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
982 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
983
984 return cs;
985}
986
987static u32 *
988gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
989{
990 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
991
992 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
993 MI_SRM_LRM_GLOBAL_GTT |
994 MI_LRI_LRM_CS_MMIO;
995 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
996 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
997 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
998 *cs++ = 0;
999
1000 return cs;
1001}
1002
1003static u32 *
1004gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1005{
1006 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1007
1008 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1009 MI_SRM_LRM_GLOBAL_GTT |
1010 MI_LRI_LRM_CS_MMIO;
1011 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1012 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1013 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1014 *cs++ = 0;
1015
1016 *cs++ = MI_LOAD_REGISTER_REG |
1017 MI_LRR_SOURCE_CS_MMIO |
1018 MI_LRI_LRM_CS_MMIO;
1019 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1020 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1021
1022 return cs;
1023}
1024
1025static u32 *
1026gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1027{
1028 cs = gen12_emit_timestamp_wa(ce, cs);
1029 cs = gen12_emit_cmd_buf_wa(ce, cs);
1030 cs = gen12_emit_restore_scratch(ce, cs);
1031
1032 return cs;
1033}
1034
1035static u32 *
1036gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1037{
1038 cs = gen12_emit_timestamp_wa(ce, cs);
1039 cs = gen12_emit_restore_scratch(ce, cs);
1040
1041 return cs;
1042}
1043
1044static u32 context_wa_bb_offset(const struct intel_context *ce)
1045{
1046 return PAGE_SIZE * ce->wa_bb_page;
1047}
1048
1049static u32 *context_indirect_bb(const struct intel_context *ce)
1050{
1051 void *ptr;
1052
1053 GEM_BUG_ON(!ce->wa_bb_page);
1054
1055 ptr = ce->lrc_reg_state;
1056 ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1057 ptr += context_wa_bb_offset(ce);
1058
1059 return ptr;
1060}
1061
1062static void
1063setup_indirect_ctx_bb(const struct intel_context *ce,
1064 const struct intel_engine_cs *engine,
1065 u32 *(*emit)(const struct intel_context *, u32 *))
1066{
1067 u32 * const start = context_indirect_bb(ce);
1068 u32 *cs;
1069
1070 cs = emit(ce, start);
1071 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1072 while ((unsigned long)cs % CACHELINE_BYTES)
1073 *cs++ = MI_NOOP;
1074
1075 lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1076 i915_ggtt_offset(ce->state) +
1077 context_wa_bb_offset(ce),
1078 (cs - start) * sizeof(*cs));
1079}
1080
1081/*
1082 * The context descriptor encodes various attributes of a context,
1083 * including its GTT address and some flags. Because it's fairly
1084 * expensive to calculate, we'll just do it once and cache the result,
1085 * which remains valid until the context is unpinned.
1086 *
1087 * This is what a descriptor looks like, from LSB to MSB::
1088 *
1089 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
1090 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
1091 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
1092 * bits 53-54: mbz, reserved for use by hardware
1093 * bits 55-63: group ID, currently unused and set to 0
1094 *
1095 * Starting from Gen11, the upper dword of the descriptor has a new format:
1096 *
1097 * bits 32-36: reserved
1098 * bits 37-47: SW context ID
1099 * bits 48:53: engine instance
1100 * bit 54: mbz, reserved for use by hardware
1101 * bits 55-60: SW counter
1102 * bits 61-63: engine class
1103 *
1104 * engine info, SW context ID and SW counter need to form a unique number
1105 * (Context ID) per lrc.
1106 */
1107static u32 lrc_descriptor(const struct intel_context *ce)
1108{
1109 u32 desc;
1110
1111 desc = INTEL_LEGACY_32B_CONTEXT;
1112 if (i915_vm_is_4lvl(ce->vm))
1113 desc = INTEL_LEGACY_64B_CONTEXT;
1114 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1115
1116 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1117 if (GRAPHICS_VER(ce->vm->i915) == 8)
1118 desc |= GEN8_CTX_L3LLC_COHERENT;
1119
1120 return i915_ggtt_offset(ce->state) | desc;
1121}
1122
1123u32 lrc_update_regs(const struct intel_context *ce,
1124 const struct intel_engine_cs *engine,
1125 u32 head)
1126{
1127 struct intel_ring *ring = ce->ring;
1128 u32 *regs = ce->lrc_reg_state;
1129
1130 GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1131 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1132
1133 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1134 regs[CTX_RING_HEAD] = head;
1135 regs[CTX_RING_TAIL] = ring->tail;
1136 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1137
1138 /* RPCS */
1139 if (engine->class == RENDER_CLASS) {
1140 regs[CTX_R_PWR_CLK_STATE] =
1141 intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1142
1143 i915_oa_init_reg_state(ce, engine);
1144 }
1145
1146 if (ce->wa_bb_page) {
1147 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1148
1149 fn = gen12_emit_indirect_ctx_xcs;
1150 if (ce->engine->class == RENDER_CLASS)
1151 fn = gen12_emit_indirect_ctx_rcs;
1152
1153 /* Mutually exclusive wrt to global indirect bb */
1154 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1155 setup_indirect_ctx_bb(ce, engine, fn);
1156 }
1157
1158 return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1159}
1160
1161void lrc_update_offsets(struct intel_context *ce,
1162 struct intel_engine_cs *engine)
1163{
1164 set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1165}
1166
1167void lrc_check_regs(const struct intel_context *ce,
1168 const struct intel_engine_cs *engine,
1169 const char *when)
1170{
1171 const struct intel_ring *ring = ce->ring;
1172 u32 *regs = ce->lrc_reg_state;
1173 bool valid = true;
1174 int x;
1175
1176 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1177 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1178 engine->name,
1179 regs[CTX_RING_START],
1180 i915_ggtt_offset(ring->vma));
1181 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1182 valid = false;
1183 }
1184
1185 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1186 (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1187 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1188 engine->name,
1189 regs[CTX_RING_CTL],
1190 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1191 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1192 valid = false;
1193 }
1194
1195 x = lrc_ring_mi_mode(engine);
1196 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1197 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1198 engine->name, regs[x + 1]);
1199 regs[x + 1] &= ~STOP_RING;
1200 regs[x + 1] |= STOP_RING << 16;
1201 valid = false;
1202 }
1203
1204 WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1205}
1206
1207/*
1208 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1209 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1210 * but there is a slight complication as this is applied in WA batch where the
1211 * values are only initialized once so we cannot take register value at the
1212 * beginning and reuse it further; hence we save its value to memory, upload a
1213 * constant value with bit21 set and then we restore it back with the saved value.
1214 * To simplify the WA, a constant value is formed by using the default value
1215 * of this register. This shouldn't be a problem because we are only modifying
1216 * it for a short period and this batch in non-premptible. We can ofcourse
1217 * use additional instructions that read the actual value of the register
1218 * at that time and set our bit of interest but it makes the WA complicated.
1219 *
1220 * This WA is also required for Gen9 so extracting as a function avoids
1221 * code duplication.
1222 */
1223static u32 *
1224gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1225{
1226 /* NB no one else is allowed to scribble over scratch + 256! */
1227 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1228 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1229 *batch++ = intel_gt_scratch_offset(engine->gt,
1230 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1231 *batch++ = 0;
1232
1233 *batch++ = MI_LOAD_REGISTER_IMM(1);
1234 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1235 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1236
1237 batch = gen8_emit_pipe_control(batch,
1238 PIPE_CONTROL_CS_STALL |
1239 PIPE_CONTROL_DC_FLUSH_ENABLE,
1240 0);
1241
1242 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1243 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1244 *batch++ = intel_gt_scratch_offset(engine->gt,
1245 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1246 *batch++ = 0;
1247
1248 return batch;
1249}
1250
1251/*
1252 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1253 * initialized at the beginning and shared across all contexts but this field
1254 * helps us to have multiple batches at different offsets and select them based
1255 * on a criteria. At the moment this batch always start at the beginning of the page
1256 * and at this point we don't have multiple wa_ctx batch buffers.
1257 *
1258 * The number of WA applied are not known at the beginning; we use this field
1259 * to return the no of DWORDS written.
1260 *
1261 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1262 * so it adds NOOPs as padding to make it cacheline aligned.
1263 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1264 * makes a complete batch buffer.
1265 */
1266static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1267{
1268 /* WaDisableCtxRestoreArbitration:bdw,chv */
1269 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1270
1271 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1272 if (IS_BROADWELL(engine->i915))
1273 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1274
1275 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1276 /* Actual scratch location is at 128 bytes offset */
1277 batch = gen8_emit_pipe_control(batch,
1278 PIPE_CONTROL_FLUSH_L3 |
1279 PIPE_CONTROL_STORE_DATA_INDEX |
1280 PIPE_CONTROL_CS_STALL |
1281 PIPE_CONTROL_QW_WRITE,
1282 LRC_PPHWSP_SCRATCH_ADDR);
1283
1284 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1285
1286 /* Pad to end of cacheline */
1287 while ((unsigned long)batch % CACHELINE_BYTES)
1288 *batch++ = MI_NOOP;
1289
1290 /*
1291 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1292 * execution depends on the length specified in terms of cache lines
1293 * in the register CTX_RCS_INDIRECT_CTX
1294 */
1295
1296 return batch;
1297}
1298
1299struct lri {
1300 i915_reg_t reg;
1301 u32 value;
1302};
1303
1304static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1305{
1306 GEM_BUG_ON(!count || count > 63);
1307
1308 *batch++ = MI_LOAD_REGISTER_IMM(count);
1309 do {
1310 *batch++ = i915_mmio_reg_offset(lri->reg);
1311 *batch++ = lri->value;
1312 } while (lri++, --count);
1313 *batch++ = MI_NOOP;
1314
1315 return batch;
1316}
1317
1318static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1319{
1320 static const struct lri lri[] = {
1321 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1322 {
1323 COMMON_SLICE_CHICKEN2,
1324 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1325 0),
1326 },
1327
1328 /* BSpec: 11391 */
1329 {
1330 FF_SLICE_CHICKEN,
1331 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1332 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1333 },
1334
1335 /* BSpec: 11299 */
1336 {
1337 _3D_CHICKEN3,
1338 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1339 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1340 }
1341 };
1342
1343 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1344
1345 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1346 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1347
1348 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1349 batch = gen8_emit_pipe_control(batch,
1350 PIPE_CONTROL_FLUSH_L3 |
1351 PIPE_CONTROL_STORE_DATA_INDEX |
1352 PIPE_CONTROL_CS_STALL |
1353 PIPE_CONTROL_QW_WRITE,
1354 LRC_PPHWSP_SCRATCH_ADDR);
1355
1356 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1357
1358 /* WaMediaPoolStateCmdInWABB:bxt,glk */
1359 if (HAS_POOLED_EU(engine->i915)) {
1360 /*
1361 * EU pool configuration is setup along with golden context
1362 * during context initialization. This value depends on
1363 * device type (2x6 or 3x6) and needs to be updated based
1364 * on which subslice is disabled especially for 2x6
1365 * devices, however it is safe to load default
1366 * configuration of 3x6 device instead of masking off
1367 * corresponding bits because HW ignores bits of a disabled
1368 * subslice and drops down to appropriate config. Please
1369 * see render_state_setup() in i915_gem_render_state.c for
1370 * possible configurations, to avoid duplication they are
1371 * not shown here again.
1372 */
1373 *batch++ = GEN9_MEDIA_POOL_STATE;
1374 *batch++ = GEN9_MEDIA_POOL_ENABLE;
1375 *batch++ = 0x00777000;
1376 *batch++ = 0;
1377 *batch++ = 0;
1378 *batch++ = 0;
1379 }
1380
1381 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1382
1383 /* Pad to end of cacheline */
1384 while ((unsigned long)batch % CACHELINE_BYTES)
1385 *batch++ = MI_NOOP;
1386
1387 return batch;
1388}
1389
1390static u32 *
1391gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1392{
1393 int i;
1394
1395 /*
1396 * WaPipeControlBefore3DStateSamplePattern: cnl
1397 *
1398 * Ensure the engine is idle prior to programming a
1399 * 3DSTATE_SAMPLE_PATTERN during a context restore.
1400 */
1401 batch = gen8_emit_pipe_control(batch,
1402 PIPE_CONTROL_CS_STALL,
1403 0);
1404 /*
1405 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
1406 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
1407 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
1408 * confusing. Since gen8_emit_pipe_control() already advances the
1409 * batch by 6 dwords, we advance the other 10 here, completing a
1410 * cacheline. It's not clear if the workaround requires this padding
1411 * before other commands, or if it's just the regular padding we would
1412 * already have for the workaround bb, so leave it here for now.
1413 */
1414 for (i = 0; i < 10; i++)
1415 *batch++ = MI_NOOP;
1416
1417 /* Pad to end of cacheline */
1418 while ((unsigned long)batch % CACHELINE_BYTES)
1419 *batch++ = MI_NOOP;
1420
1421 return batch;
1422}
1423
1424#define CTX_WA_BB_SIZE (PAGE_SIZE)
1425
1426static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1427{
1428 struct drm_i915_gem_object *obj;
1429 struct i915_vma *vma;
1430 int err;
1431
1432 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1433 if (IS_ERR(obj))
1434 return PTR_ERR(obj);
1435
1436 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1437 if (IS_ERR(vma)) {
1438 err = PTR_ERR(vma);
1439 goto err;
1440 }
1441
1442 engine->wa_ctx.vma = vma;
1443 return 0;
1444
1445err:
1446 i915_gem_object_put(obj);
1447 return err;
1448}
1449
1450void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1451{
1452 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1453}
1454
1455typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1456
1457void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1458{
1459 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1460 struct i915_wa_ctx_bb *wa_bb[] = {
1461 &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1462 };
1463 wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1464 struct i915_gem_ww_ctx ww;
1465 void *batch, *batch_ptr;
1466 unsigned int i;
1467 int err;
1468
1469 if (engine->class != RENDER_CLASS)
1470 return;
1471
1472 switch (GRAPHICS_VER(engine->i915)) {
1473 case 12:
1474 case 11:
1475 return;
1476 case 10:
1477 wa_bb_fn[0] = gen10_init_indirectctx_bb;
1478 wa_bb_fn[1] = NULL;
1479 break;
1480 case 9:
1481 wa_bb_fn[0] = gen9_init_indirectctx_bb;
1482 wa_bb_fn[1] = NULL;
1483 break;
1484 case 8:
1485 wa_bb_fn[0] = gen8_init_indirectctx_bb;
1486 wa_bb_fn[1] = NULL;
1487 break;
1488 default:
1489 MISSING_CASE(GRAPHICS_VER(engine->i915));
1490 return;
1491 }
1492
1493 err = lrc_create_wa_ctx(engine);
1494 if (err) {
1495 /*
1496 * We continue even if we fail to initialize WA batch
1497 * because we only expect rare glitches but nothing
1498 * critical to prevent us from using GPU
1499 */
1500 drm_err(&engine->i915->drm,
1501 "Ignoring context switch w/a allocation error:%d\n",
1502 err);
1503 return;
1504 }
1505
1506 if (!engine->wa_ctx.vma)
1507 return;
1508
1509 i915_gem_ww_ctx_init(&ww, true);
1510retry:
1511 err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1512 if (!err)
1513 err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1514 if (err)
1515 goto err;
1516
1517 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1518 if (IS_ERR(batch)) {
1519 err = PTR_ERR(batch);
1520 goto err_unpin;
1521 }
1522
1523 /*
1524 * Emit the two workaround batch buffers, recording the offset from the
1525 * start of the workaround batch buffer object for each and their
1526 * respective sizes.
1527 */
1528 batch_ptr = batch;
1529 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1530 wa_bb[i]->offset = batch_ptr - batch;
1531 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1532 CACHELINE_BYTES))) {
1533 err = -EINVAL;
1534 break;
1535 }
1536 if (wa_bb_fn[i])
1537 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1538 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1539 }
1540 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1541
1542 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1543 __i915_gem_object_release_map(wa_ctx->vma->obj);
1544
1545 /* Verify that we can handle failure to setup the wa_ctx */
1546 if (!err)
1547 err = i915_inject_probe_error(engine->i915, -ENODEV);
1548
1549err_unpin:
1550 if (err)
1551 i915_vma_unpin(wa_ctx->vma);
1552err:
1553 if (err == -EDEADLK) {
1554 err = i915_gem_ww_ctx_backoff(&ww);
1555 if (!err)
1556 goto retry;
1557 }
1558 i915_gem_ww_ctx_fini(&ww);
1559
1560 if (err) {
1561 i915_vma_put(engine->wa_ctx.vma);
1562
1563 /* Clear all flags to prevent further use */
1564 memset(wa_ctx, 0, sizeof(*wa_ctx));
1565 }
1566}
1567
1568static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1569{
1570#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1571 ce->runtime.num_underflow++;
1572 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1573#endif
1574}
1575
1576void lrc_update_runtime(struct intel_context *ce)
1577{
1578 u32 old;
1579 s32 dt;
1580
1581 if (intel_context_is_barrier(ce))
1582 return;
1583
1584 old = ce->runtime.last;
1585 ce->runtime.last = lrc_get_runtime(ce);
1586 dt = ce->runtime.last - old;
1587
1588 if (unlikely(dt < 0)) {
1589 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1590 old, ce->runtime.last, dt);
1591 st_update_runtime_underflow(ce, dt);
1592 return;
1593 }
1594
1595 ewma_runtime_add(&ce->runtime.avg, dt);
1596 ce->runtime.total += dt;
1597}
1598
1599#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1600#include "selftest_lrc.c"
1601#endif