Loading...
Note: File does not exist in v4.6.
1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Ben Widawsky <ben@bwidawsk.net>
25 * Michel Thierry <michel.thierry@intel.com>
26 * Thomas Daniel <thomas.daniel@intel.com>
27 * Oscar Mateo <oscar.mateo@intel.com>
28 *
29 */
30
31/**
32 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33 *
34 * Motivation:
35 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36 * These expanded contexts enable a number of new abilities, especially
37 * "Execlists" (also implemented in this file).
38 *
39 * One of the main differences with the legacy HW contexts is that logical
40 * ring contexts incorporate many more things to the context's state, like
41 * PDPs or ringbuffer control registers:
42 *
43 * The reason why PDPs are included in the context is straightforward: as
44 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46 * instead, the GPU will do it for you on the context switch.
47 *
48 * But, what about the ringbuffer control registers (head, tail, etc..)?
49 * shouldn't we just need a set of those per engine command streamer? This is
50 * where the name "Logical Rings" starts to make sense: by virtualizing the
51 * rings, the engine cs shifts to a new "ring buffer" with every context
52 * switch. When you want to submit a workload to the GPU you: A) choose your
53 * context, B) find its appropriate virtualized ring, C) write commands to it
54 * and then, finally, D) tell the GPU to switch to that context.
55 *
56 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57 * to a contexts is via a context execution list, ergo "Execlists".
58 *
59 * LRC implementation:
60 * Regarding the creation of contexts, we have:
61 *
62 * - One global default context.
63 * - One local default context for each opened fd.
64 * - One local extra context for each context create ioctl call.
65 *
66 * Now that ringbuffers belong per-context (and not per-engine, like before)
67 * and that contexts are uniquely tied to a given engine (and not reusable,
68 * like before) we need:
69 *
70 * - One ringbuffer per-engine inside each context.
71 * - One backing object per-engine inside each context.
72 *
73 * The global default context starts its life with these new objects fully
74 * allocated and populated. The local default context for each opened fd is
75 * more complex, because we don't know at creation time which engine is going
76 * to use them. To handle this, we have implemented a deferred creation of LR
77 * contexts:
78 *
79 * The local context starts its life as a hollow or blank holder, that only
80 * gets populated for a given engine once we receive an execbuffer. If later
81 * on we receive another execbuffer ioctl for the same context but a different
82 * engine, we allocate/populate a new ringbuffer and context backing object and
83 * so on.
84 *
85 * Finally, regarding local contexts created using the ioctl call: as they are
86 * only allowed with the render ring, we can allocate & populate them right
87 * away (no need to defer anything, at least for now).
88 *
89 * Execlists implementation:
90 * Execlists are the new method by which, on gen8+ hardware, workloads are
91 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92 * This method works as follows:
93 *
94 * When a request is committed, its commands (the BB start and any leading or
95 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96 * for the appropriate context. The tail pointer in the hardware context is not
97 * updated at this time, but instead, kept by the driver in the ringbuffer
98 * structure. A structure representing this request is added to a request queue
99 * for the appropriate engine: this structure contains a copy of the context's
100 * tail after the request was written to the ring buffer and a pointer to the
101 * context itself.
102 *
103 * If the engine's request queue was empty before the request was added, the
104 * queue is processed immediately. Otherwise the queue will be processed during
105 * a context switch interrupt. In any case, elements on the queue will get sent
106 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107 * globally unique 20-bits submission ID.
108 *
109 * When execution of a request completes, the GPU updates the context status
110 * buffer with a context complete event and generates a context switch interrupt.
111 * During the interrupt handling, the driver examines the events in the buffer:
112 * for each context complete event, if the announced ID matches that on the head
113 * of the request queue, then that request is retired and removed from the queue.
114 *
115 * After processing, if any requests were retired and the queue is not empty
116 * then a new execution list can be submitted. The two requests at the front of
117 * the queue are next to be submitted but since a context may not occur twice in
118 * an execution list, if subsequent requests have the same ID as the first then
119 * the two requests must be combined. This is done simply by discarding requests
120 * at the head of the queue until either only one requests is left (in which case
121 * we use a NULL second context) or the first two requests have unique IDs.
122 *
123 * By always executing the first two requests in the queue the driver ensures
124 * that the GPU is kept as busy as possible. In the case where a single context
125 * completes but a second context is still executing, the request for this second
126 * context will be at the head of the queue when we remove the first one. This
127 * request will then be resubmitted along with a new request for a different context,
128 * which will cause the hardware to continue executing the second request and queue
129 * the new request (the GPU detects the condition of a context getting preempted
130 * with the same context and optimizes the context switch flow by not doing
131 * preemption, but just sampling the new tail pointer).
132 *
133 */
134#include <linux/interrupt.h>
135
136#include "i915_drv.h"
137#include "i915_perf.h"
138#include "i915_trace.h"
139#include "i915_vgpu.h"
140#include "intel_context.h"
141#include "intel_engine_pm.h"
142#include "intel_gt.h"
143#include "intel_gt_pm.h"
144#include "intel_gt_requests.h"
145#include "intel_lrc_reg.h"
146#include "intel_mocs.h"
147#include "intel_reset.h"
148#include "intel_ring.h"
149#include "intel_workarounds.h"
150#include "shmem_utils.h"
151
152#define RING_EXECLIST_QFULL (1 << 0x2)
153#define RING_EXECLIST1_VALID (1 << 0x3)
154#define RING_EXECLIST0_VALID (1 << 0x4)
155#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
156#define RING_EXECLIST1_ACTIVE (1 << 0x11)
157#define RING_EXECLIST0_ACTIVE (1 << 0x12)
158
159#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
160#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
161#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
162#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
163#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
164#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
165
166#define GEN8_CTX_STATUS_COMPLETED_MASK \
167 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
168
169#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
170
171#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
172#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
173#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
174#define GEN12_IDLE_CTX_ID 0x7FF
175#define GEN12_CSB_CTX_VALID(csb_dw) \
176 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
177
178/* Typical size of the average request (2 pipecontrols and a MI_BB) */
179#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
180
181struct virtual_engine {
182 struct intel_engine_cs base;
183 struct intel_context context;
184
185 /*
186 * We allow only a single request through the virtual engine at a time
187 * (each request in the timeline waits for the completion fence of
188 * the previous before being submitted). By restricting ourselves to
189 * only submitting a single request, each request is placed on to a
190 * physical to maximise load spreading (by virtue of the late greedy
191 * scheduling -- each real engine takes the next available request
192 * upon idling).
193 */
194 struct i915_request *request;
195
196 /*
197 * We keep a rbtree of available virtual engines inside each physical
198 * engine, sorted by priority. Here we preallocate the nodes we need
199 * for the virtual engine, indexed by physical_engine->id.
200 */
201 struct ve_node {
202 struct rb_node rb;
203 int prio;
204 } nodes[I915_NUM_ENGINES];
205
206 /*
207 * Keep track of bonded pairs -- restrictions upon on our selection
208 * of physical engines any particular request may be submitted to.
209 * If we receive a submit-fence from a master engine, we will only
210 * use one of sibling_mask physical engines.
211 */
212 struct ve_bond {
213 const struct intel_engine_cs *master;
214 intel_engine_mask_t sibling_mask;
215 } *bonds;
216 unsigned int num_bonds;
217
218 /* And finally, which physical engines this virtual engine maps onto. */
219 unsigned int num_siblings;
220 struct intel_engine_cs *siblings[];
221};
222
223static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
224{
225 GEM_BUG_ON(!intel_engine_is_virtual(engine));
226 return container_of(engine, struct virtual_engine, base);
227}
228
229static int __execlists_context_alloc(struct intel_context *ce,
230 struct intel_engine_cs *engine);
231
232static void execlists_init_reg_state(u32 *reg_state,
233 const struct intel_context *ce,
234 const struct intel_engine_cs *engine,
235 const struct intel_ring *ring,
236 bool close);
237static void
238__execlists_update_reg_state(const struct intel_context *ce,
239 const struct intel_engine_cs *engine,
240 u32 head);
241
242static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
243{
244 if (INTEL_GEN(engine->i915) >= 12)
245 return 0x60;
246 else if (INTEL_GEN(engine->i915) >= 9)
247 return 0x54;
248 else if (engine->class == RENDER_CLASS)
249 return 0x58;
250 else
251 return -1;
252}
253
254static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
255{
256 if (INTEL_GEN(engine->i915) >= 12)
257 return 0x74;
258 else if (INTEL_GEN(engine->i915) >= 9)
259 return 0x68;
260 else if (engine->class == RENDER_CLASS)
261 return 0xd8;
262 else
263 return -1;
264}
265
266static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
267{
268 if (INTEL_GEN(engine->i915) >= 12)
269 return 0x12;
270 else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS)
271 return 0x18;
272 else
273 return -1;
274}
275
276static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
277{
278 int x;
279
280 x = lrc_ring_wa_bb_per_ctx(engine);
281 if (x < 0)
282 return x;
283
284 return x + 2;
285}
286
287static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
288{
289 int x;
290
291 x = lrc_ring_indirect_ptr(engine);
292 if (x < 0)
293 return x;
294
295 return x + 2;
296}
297
298static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
299{
300 if (engine->class != RENDER_CLASS)
301 return -1;
302
303 if (INTEL_GEN(engine->i915) >= 12)
304 return 0xb6;
305 else if (INTEL_GEN(engine->i915) >= 11)
306 return 0xaa;
307 else
308 return -1;
309}
310
311static u32
312lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
313{
314 switch (INTEL_GEN(engine->i915)) {
315 default:
316 MISSING_CASE(INTEL_GEN(engine->i915));
317 fallthrough;
318 case 12:
319 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
320 case 11:
321 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
322 case 10:
323 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
324 case 9:
325 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
326 case 8:
327 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
328 }
329}
330
331static void
332lrc_ring_setup_indirect_ctx(u32 *regs,
333 const struct intel_engine_cs *engine,
334 u32 ctx_bb_ggtt_addr,
335 u32 size)
336{
337 GEM_BUG_ON(!size);
338 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
339 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
340 regs[lrc_ring_indirect_ptr(engine) + 1] =
341 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
342
343 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
344 regs[lrc_ring_indirect_offset(engine) + 1] =
345 lrc_ring_indirect_offset_default(engine) << 6;
346}
347
348static u32 intel_context_get_runtime(const struct intel_context *ce)
349{
350 /*
351 * We can use either ppHWSP[16] which is recorded before the context
352 * switch (and so excludes the cost of context switches) or use the
353 * value from the context image itself, which is saved/restored earlier
354 * and so includes the cost of the save.
355 */
356 return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
357}
358
359static void mark_eio(struct i915_request *rq)
360{
361 if (i915_request_completed(rq))
362 return;
363
364 GEM_BUG_ON(i915_request_signaled(rq));
365
366 i915_request_set_error_once(rq, -EIO);
367 i915_request_mark_complete(rq);
368}
369
370static struct i915_request *
371active_request(const struct intel_timeline * const tl, struct i915_request *rq)
372{
373 struct i915_request *active = rq;
374
375 rcu_read_lock();
376 list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
377 if (i915_request_completed(rq))
378 break;
379
380 active = rq;
381 }
382 rcu_read_unlock();
383
384 return active;
385}
386
387static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
388{
389 return (i915_ggtt_offset(engine->status_page.vma) +
390 I915_GEM_HWS_PREEMPT_ADDR);
391}
392
393static inline void
394ring_set_paused(const struct intel_engine_cs *engine, int state)
395{
396 /*
397 * We inspect HWS_PREEMPT with a semaphore inside
398 * engine->emit_fini_breadcrumb. If the dword is true,
399 * the ring is paused as the semaphore will busywait
400 * until the dword is false.
401 */
402 engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
403 if (state)
404 wmb();
405}
406
407static inline struct i915_priolist *to_priolist(struct rb_node *rb)
408{
409 return rb_entry(rb, struct i915_priolist, node);
410}
411
412static inline int rq_prio(const struct i915_request *rq)
413{
414 return READ_ONCE(rq->sched.attr.priority);
415}
416
417static int effective_prio(const struct i915_request *rq)
418{
419 int prio = rq_prio(rq);
420
421 /*
422 * If this request is special and must not be interrupted at any
423 * cost, so be it. Note we are only checking the most recent request
424 * in the context and so may be masking an earlier vip request. It
425 * is hoped that under the conditions where nopreempt is used, this
426 * will not matter (i.e. all requests to that context will be
427 * nopreempt for as long as desired).
428 */
429 if (i915_request_has_nopreempt(rq))
430 prio = I915_PRIORITY_UNPREEMPTABLE;
431
432 return prio;
433}
434
435static int queue_prio(const struct intel_engine_execlists *execlists)
436{
437 struct i915_priolist *p;
438 struct rb_node *rb;
439
440 rb = rb_first_cached(&execlists->queue);
441 if (!rb)
442 return INT_MIN;
443
444 /*
445 * As the priolist[] are inverted, with the highest priority in [0],
446 * we have to flip the index value to become priority.
447 */
448 p = to_priolist(rb);
449 if (!I915_USER_PRIORITY_SHIFT)
450 return p->priority;
451
452 return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used);
453}
454
455static inline bool need_preempt(const struct intel_engine_cs *engine,
456 const struct i915_request *rq,
457 struct rb_node *rb)
458{
459 int last_prio;
460
461 if (!intel_engine_has_semaphores(engine))
462 return false;
463
464 /*
465 * Check if the current priority hint merits a preemption attempt.
466 *
467 * We record the highest value priority we saw during rescheduling
468 * prior to this dequeue, therefore we know that if it is strictly
469 * less than the current tail of ESLP[0], we do not need to force
470 * a preempt-to-idle cycle.
471 *
472 * However, the priority hint is a mere hint that we may need to
473 * preempt. If that hint is stale or we may be trying to preempt
474 * ourselves, ignore the request.
475 *
476 * More naturally we would write
477 * prio >= max(0, last);
478 * except that we wish to prevent triggering preemption at the same
479 * priority level: the task that is running should remain running
480 * to preserve FIFO ordering of dependencies.
481 */
482 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
483 if (engine->execlists.queue_priority_hint <= last_prio)
484 return false;
485
486 /*
487 * Check against the first request in ELSP[1], it will, thanks to the
488 * power of PI, be the highest priority of that context.
489 */
490 if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
491 rq_prio(list_next_entry(rq, sched.link)) > last_prio)
492 return true;
493
494 if (rb) {
495 struct virtual_engine *ve =
496 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
497 bool preempt = false;
498
499 if (engine == ve->siblings[0]) { /* only preempt one sibling */
500 struct i915_request *next;
501
502 rcu_read_lock();
503 next = READ_ONCE(ve->request);
504 if (next)
505 preempt = rq_prio(next) > last_prio;
506 rcu_read_unlock();
507 }
508
509 if (preempt)
510 return preempt;
511 }
512
513 /*
514 * If the inflight context did not trigger the preemption, then maybe
515 * it was the set of queued requests? Pick the highest priority in
516 * the queue (the first active priolist) and see if it deserves to be
517 * running instead of ELSP[0].
518 *
519 * The highest priority request in the queue can not be either
520 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
521 * context, it's priority would not exceed ELSP[0] aka last_prio.
522 */
523 return queue_prio(&engine->execlists) > last_prio;
524}
525
526__maybe_unused static inline bool
527assert_priority_queue(const struct i915_request *prev,
528 const struct i915_request *next)
529{
530 /*
531 * Without preemption, the prev may refer to the still active element
532 * which we refuse to let go.
533 *
534 * Even with preemption, there are times when we think it is better not
535 * to preempt and leave an ostensibly lower priority request in flight.
536 */
537 if (i915_request_is_active(prev))
538 return true;
539
540 return rq_prio(prev) >= rq_prio(next);
541}
542
543/*
544 * The context descriptor encodes various attributes of a context,
545 * including its GTT address and some flags. Because it's fairly
546 * expensive to calculate, we'll just do it once and cache the result,
547 * which remains valid until the context is unpinned.
548 *
549 * This is what a descriptor looks like, from LSB to MSB::
550 *
551 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
552 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
553 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
554 * bits 53-54: mbz, reserved for use by hardware
555 * bits 55-63: group ID, currently unused and set to 0
556 *
557 * Starting from Gen11, the upper dword of the descriptor has a new format:
558 *
559 * bits 32-36: reserved
560 * bits 37-47: SW context ID
561 * bits 48:53: engine instance
562 * bit 54: mbz, reserved for use by hardware
563 * bits 55-60: SW counter
564 * bits 61-63: engine class
565 *
566 * engine info, SW context ID and SW counter need to form a unique number
567 * (Context ID) per lrc.
568 */
569static u32
570lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
571{
572 u32 desc;
573
574 desc = INTEL_LEGACY_32B_CONTEXT;
575 if (i915_vm_is_4lvl(ce->vm))
576 desc = INTEL_LEGACY_64B_CONTEXT;
577 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
578
579 desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
580 if (IS_GEN(engine->i915, 8))
581 desc |= GEN8_CTX_L3LLC_COHERENT;
582
583 return i915_ggtt_offset(ce->state) | desc;
584}
585
586static inline unsigned int dword_in_page(void *addr)
587{
588 return offset_in_page(addr) / sizeof(u32);
589}
590
591static void set_offsets(u32 *regs,
592 const u8 *data,
593 const struct intel_engine_cs *engine,
594 bool clear)
595#define NOP(x) (BIT(7) | (x))
596#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
597#define POSTED BIT(0)
598#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
599#define REG16(x) \
600 (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
601 (((x) >> 2) & 0x7f)
602#define END(total_state_size) 0, (total_state_size)
603{
604 const u32 base = engine->mmio_base;
605
606 while (*data) {
607 u8 count, flags;
608
609 if (*data & BIT(7)) { /* skip */
610 count = *data++ & ~BIT(7);
611 if (clear)
612 memset32(regs, MI_NOOP, count);
613 regs += count;
614 continue;
615 }
616
617 count = *data & 0x3f;
618 flags = *data >> 6;
619 data++;
620
621 *regs = MI_LOAD_REGISTER_IMM(count);
622 if (flags & POSTED)
623 *regs |= MI_LRI_FORCE_POSTED;
624 if (INTEL_GEN(engine->i915) >= 11)
625 *regs |= MI_LRI_LRM_CS_MMIO;
626 regs++;
627
628 GEM_BUG_ON(!count);
629 do {
630 u32 offset = 0;
631 u8 v;
632
633 do {
634 v = *data++;
635 offset <<= 7;
636 offset |= v & ~BIT(7);
637 } while (v & BIT(7));
638
639 regs[0] = base + (offset << 2);
640 if (clear)
641 regs[1] = 0;
642 regs += 2;
643 } while (--count);
644 }
645
646 if (clear) {
647 u8 count = *++data;
648
649 /* Clear past the tail for HW access */
650 GEM_BUG_ON(dword_in_page(regs) > count);
651 memset32(regs, MI_NOOP, count - dword_in_page(regs));
652
653 /* Close the batch; used mainly by live_lrc_layout() */
654 *regs = MI_BATCH_BUFFER_END;
655 if (INTEL_GEN(engine->i915) >= 10)
656 *regs |= BIT(0);
657 }
658}
659
660static const u8 gen8_xcs_offsets[] = {
661 NOP(1),
662 LRI(11, 0),
663 REG16(0x244),
664 REG(0x034),
665 REG(0x030),
666 REG(0x038),
667 REG(0x03c),
668 REG(0x168),
669 REG(0x140),
670 REG(0x110),
671 REG(0x11c),
672 REG(0x114),
673 REG(0x118),
674
675 NOP(9),
676 LRI(9, 0),
677 REG16(0x3a8),
678 REG16(0x28c),
679 REG16(0x288),
680 REG16(0x284),
681 REG16(0x280),
682 REG16(0x27c),
683 REG16(0x278),
684 REG16(0x274),
685 REG16(0x270),
686
687 NOP(13),
688 LRI(2, 0),
689 REG16(0x200),
690 REG(0x028),
691
692 END(80)
693};
694
695static const u8 gen9_xcs_offsets[] = {
696 NOP(1),
697 LRI(14, POSTED),
698 REG16(0x244),
699 REG(0x034),
700 REG(0x030),
701 REG(0x038),
702 REG(0x03c),
703 REG(0x168),
704 REG(0x140),
705 REG(0x110),
706 REG(0x11c),
707 REG(0x114),
708 REG(0x118),
709 REG(0x1c0),
710 REG(0x1c4),
711 REG(0x1c8),
712
713 NOP(3),
714 LRI(9, POSTED),
715 REG16(0x3a8),
716 REG16(0x28c),
717 REG16(0x288),
718 REG16(0x284),
719 REG16(0x280),
720 REG16(0x27c),
721 REG16(0x278),
722 REG16(0x274),
723 REG16(0x270),
724
725 NOP(13),
726 LRI(1, POSTED),
727 REG16(0x200),
728
729 NOP(13),
730 LRI(44, POSTED),
731 REG(0x028),
732 REG(0x09c),
733 REG(0x0c0),
734 REG(0x178),
735 REG(0x17c),
736 REG16(0x358),
737 REG(0x170),
738 REG(0x150),
739 REG(0x154),
740 REG(0x158),
741 REG16(0x41c),
742 REG16(0x600),
743 REG16(0x604),
744 REG16(0x608),
745 REG16(0x60c),
746 REG16(0x610),
747 REG16(0x614),
748 REG16(0x618),
749 REG16(0x61c),
750 REG16(0x620),
751 REG16(0x624),
752 REG16(0x628),
753 REG16(0x62c),
754 REG16(0x630),
755 REG16(0x634),
756 REG16(0x638),
757 REG16(0x63c),
758 REG16(0x640),
759 REG16(0x644),
760 REG16(0x648),
761 REG16(0x64c),
762 REG16(0x650),
763 REG16(0x654),
764 REG16(0x658),
765 REG16(0x65c),
766 REG16(0x660),
767 REG16(0x664),
768 REG16(0x668),
769 REG16(0x66c),
770 REG16(0x670),
771 REG16(0x674),
772 REG16(0x678),
773 REG16(0x67c),
774 REG(0x068),
775
776 END(176)
777};
778
779static const u8 gen12_xcs_offsets[] = {
780 NOP(1),
781 LRI(13, POSTED),
782 REG16(0x244),
783 REG(0x034),
784 REG(0x030),
785 REG(0x038),
786 REG(0x03c),
787 REG(0x168),
788 REG(0x140),
789 REG(0x110),
790 REG(0x1c0),
791 REG(0x1c4),
792 REG(0x1c8),
793 REG(0x180),
794 REG16(0x2b4),
795
796 NOP(5),
797 LRI(9, POSTED),
798 REG16(0x3a8),
799 REG16(0x28c),
800 REG16(0x288),
801 REG16(0x284),
802 REG16(0x280),
803 REG16(0x27c),
804 REG16(0x278),
805 REG16(0x274),
806 REG16(0x270),
807
808 END(80)
809};
810
811static const u8 gen8_rcs_offsets[] = {
812 NOP(1),
813 LRI(14, POSTED),
814 REG16(0x244),
815 REG(0x034),
816 REG(0x030),
817 REG(0x038),
818 REG(0x03c),
819 REG(0x168),
820 REG(0x140),
821 REG(0x110),
822 REG(0x11c),
823 REG(0x114),
824 REG(0x118),
825 REG(0x1c0),
826 REG(0x1c4),
827 REG(0x1c8),
828
829 NOP(3),
830 LRI(9, POSTED),
831 REG16(0x3a8),
832 REG16(0x28c),
833 REG16(0x288),
834 REG16(0x284),
835 REG16(0x280),
836 REG16(0x27c),
837 REG16(0x278),
838 REG16(0x274),
839 REG16(0x270),
840
841 NOP(13),
842 LRI(1, 0),
843 REG(0x0c8),
844
845 END(80)
846};
847
848static const u8 gen9_rcs_offsets[] = {
849 NOP(1),
850 LRI(14, POSTED),
851 REG16(0x244),
852 REG(0x34),
853 REG(0x30),
854 REG(0x38),
855 REG(0x3c),
856 REG(0x168),
857 REG(0x140),
858 REG(0x110),
859 REG(0x11c),
860 REG(0x114),
861 REG(0x118),
862 REG(0x1c0),
863 REG(0x1c4),
864 REG(0x1c8),
865
866 NOP(3),
867 LRI(9, POSTED),
868 REG16(0x3a8),
869 REG16(0x28c),
870 REG16(0x288),
871 REG16(0x284),
872 REG16(0x280),
873 REG16(0x27c),
874 REG16(0x278),
875 REG16(0x274),
876 REG16(0x270),
877
878 NOP(13),
879 LRI(1, 0),
880 REG(0xc8),
881
882 NOP(13),
883 LRI(44, POSTED),
884 REG(0x28),
885 REG(0x9c),
886 REG(0xc0),
887 REG(0x178),
888 REG(0x17c),
889 REG16(0x358),
890 REG(0x170),
891 REG(0x150),
892 REG(0x154),
893 REG(0x158),
894 REG16(0x41c),
895 REG16(0x600),
896 REG16(0x604),
897 REG16(0x608),
898 REG16(0x60c),
899 REG16(0x610),
900 REG16(0x614),
901 REG16(0x618),
902 REG16(0x61c),
903 REG16(0x620),
904 REG16(0x624),
905 REG16(0x628),
906 REG16(0x62c),
907 REG16(0x630),
908 REG16(0x634),
909 REG16(0x638),
910 REG16(0x63c),
911 REG16(0x640),
912 REG16(0x644),
913 REG16(0x648),
914 REG16(0x64c),
915 REG16(0x650),
916 REG16(0x654),
917 REG16(0x658),
918 REG16(0x65c),
919 REG16(0x660),
920 REG16(0x664),
921 REG16(0x668),
922 REG16(0x66c),
923 REG16(0x670),
924 REG16(0x674),
925 REG16(0x678),
926 REG16(0x67c),
927 REG(0x68),
928
929 END(176)
930};
931
932static const u8 gen11_rcs_offsets[] = {
933 NOP(1),
934 LRI(15, POSTED),
935 REG16(0x244),
936 REG(0x034),
937 REG(0x030),
938 REG(0x038),
939 REG(0x03c),
940 REG(0x168),
941 REG(0x140),
942 REG(0x110),
943 REG(0x11c),
944 REG(0x114),
945 REG(0x118),
946 REG(0x1c0),
947 REG(0x1c4),
948 REG(0x1c8),
949 REG(0x180),
950
951 NOP(1),
952 LRI(9, POSTED),
953 REG16(0x3a8),
954 REG16(0x28c),
955 REG16(0x288),
956 REG16(0x284),
957 REG16(0x280),
958 REG16(0x27c),
959 REG16(0x278),
960 REG16(0x274),
961 REG16(0x270),
962
963 LRI(1, POSTED),
964 REG(0x1b0),
965
966 NOP(10),
967 LRI(1, 0),
968 REG(0x0c8),
969
970 END(80)
971};
972
973static const u8 gen12_rcs_offsets[] = {
974 NOP(1),
975 LRI(13, POSTED),
976 REG16(0x244),
977 REG(0x034),
978 REG(0x030),
979 REG(0x038),
980 REG(0x03c),
981 REG(0x168),
982 REG(0x140),
983 REG(0x110),
984 REG(0x1c0),
985 REG(0x1c4),
986 REG(0x1c8),
987 REG(0x180),
988 REG16(0x2b4),
989
990 NOP(5),
991 LRI(9, POSTED),
992 REG16(0x3a8),
993 REG16(0x28c),
994 REG16(0x288),
995 REG16(0x284),
996 REG16(0x280),
997 REG16(0x27c),
998 REG16(0x278),
999 REG16(0x274),
1000 REG16(0x270),
1001
1002 LRI(3, POSTED),
1003 REG(0x1b0),
1004 REG16(0x5a8),
1005 REG16(0x5ac),
1006
1007 NOP(6),
1008 LRI(1, 0),
1009 REG(0x0c8),
1010 NOP(3 + 9 + 1),
1011
1012 LRI(51, POSTED),
1013 REG16(0x588),
1014 REG16(0x588),
1015 REG16(0x588),
1016 REG16(0x588),
1017 REG16(0x588),
1018 REG16(0x588),
1019 REG(0x028),
1020 REG(0x09c),
1021 REG(0x0c0),
1022 REG(0x178),
1023 REG(0x17c),
1024 REG16(0x358),
1025 REG(0x170),
1026 REG(0x150),
1027 REG(0x154),
1028 REG(0x158),
1029 REG16(0x41c),
1030 REG16(0x600),
1031 REG16(0x604),
1032 REG16(0x608),
1033 REG16(0x60c),
1034 REG16(0x610),
1035 REG16(0x614),
1036 REG16(0x618),
1037 REG16(0x61c),
1038 REG16(0x620),
1039 REG16(0x624),
1040 REG16(0x628),
1041 REG16(0x62c),
1042 REG16(0x630),
1043 REG16(0x634),
1044 REG16(0x638),
1045 REG16(0x63c),
1046 REG16(0x640),
1047 REG16(0x644),
1048 REG16(0x648),
1049 REG16(0x64c),
1050 REG16(0x650),
1051 REG16(0x654),
1052 REG16(0x658),
1053 REG16(0x65c),
1054 REG16(0x660),
1055 REG16(0x664),
1056 REG16(0x668),
1057 REG16(0x66c),
1058 REG16(0x670),
1059 REG16(0x674),
1060 REG16(0x678),
1061 REG16(0x67c),
1062 REG(0x068),
1063 REG(0x084),
1064 NOP(1),
1065
1066 END(192)
1067};
1068
1069#undef END
1070#undef REG16
1071#undef REG
1072#undef LRI
1073#undef NOP
1074
1075static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1076{
1077 /*
1078 * The gen12+ lists only have the registers we program in the basic
1079 * default state. We rely on the context image using relative
1080 * addressing to automatic fixup the register state between the
1081 * physical engines for virtual engine.
1082 */
1083 GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
1084 !intel_engine_has_relative_mmio(engine));
1085
1086 if (engine->class == RENDER_CLASS) {
1087 if (INTEL_GEN(engine->i915) >= 12)
1088 return gen12_rcs_offsets;
1089 else if (INTEL_GEN(engine->i915) >= 11)
1090 return gen11_rcs_offsets;
1091 else if (INTEL_GEN(engine->i915) >= 9)
1092 return gen9_rcs_offsets;
1093 else
1094 return gen8_rcs_offsets;
1095 } else {
1096 if (INTEL_GEN(engine->i915) >= 12)
1097 return gen12_xcs_offsets;
1098 else if (INTEL_GEN(engine->i915) >= 9)
1099 return gen9_xcs_offsets;
1100 else
1101 return gen8_xcs_offsets;
1102 }
1103}
1104
1105static struct i915_request *
1106__unwind_incomplete_requests(struct intel_engine_cs *engine)
1107{
1108 struct i915_request *rq, *rn, *active = NULL;
1109 struct list_head *pl;
1110 int prio = I915_PRIORITY_INVALID;
1111
1112 lockdep_assert_held(&engine->active.lock);
1113
1114 list_for_each_entry_safe_reverse(rq, rn,
1115 &engine->active.requests,
1116 sched.link) {
1117 if (i915_request_completed(rq))
1118 continue; /* XXX */
1119
1120 __i915_request_unsubmit(rq);
1121
1122 /*
1123 * Push the request back into the queue for later resubmission.
1124 * If this request is not native to this physical engine (i.e.
1125 * it came from a virtual source), push it back onto the virtual
1126 * engine so that it can be moved across onto another physical
1127 * engine as load dictates.
1128 */
1129 if (likely(rq->execution_mask == engine->mask)) {
1130 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
1131 if (rq_prio(rq) != prio) {
1132 prio = rq_prio(rq);
1133 pl = i915_sched_lookup_priolist(engine, prio);
1134 }
1135 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
1136
1137 list_move(&rq->sched.link, pl);
1138 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1139
1140 /* Check in case we rollback so far we wrap [size/2] */
1141 if (intel_ring_direction(rq->ring,
1142 intel_ring_wrap(rq->ring,
1143 rq->tail),
1144 rq->ring->tail) > 0)
1145 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1146
1147 active = rq;
1148 } else {
1149 struct intel_engine_cs *owner = rq->context->engine;
1150
1151 /*
1152 * Decouple the virtual breadcrumb before moving it
1153 * back to the virtual engine -- we don't want the
1154 * request to complete in the background and try
1155 * and cancel the breadcrumb on the virtual engine
1156 * (instead of the old engine where it is linked)!
1157 */
1158 if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1159 &rq->fence.flags)) {
1160 spin_lock_nested(&rq->lock,
1161 SINGLE_DEPTH_NESTING);
1162 i915_request_cancel_breadcrumb(rq);
1163 spin_unlock(&rq->lock);
1164 }
1165 WRITE_ONCE(rq->engine, owner);
1166 owner->submit_request(rq);
1167 active = NULL;
1168 }
1169 }
1170
1171 return active;
1172}
1173
1174struct i915_request *
1175execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1176{
1177 struct intel_engine_cs *engine =
1178 container_of(execlists, typeof(*engine), execlists);
1179
1180 return __unwind_incomplete_requests(engine);
1181}
1182
1183static inline void
1184execlists_context_status_change(struct i915_request *rq, unsigned long status)
1185{
1186 /*
1187 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1188 * The compiler should eliminate this function as dead-code.
1189 */
1190 if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
1191 return;
1192
1193 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1194 status, rq);
1195}
1196
1197static void intel_engine_context_in(struct intel_engine_cs *engine)
1198{
1199 unsigned long flags;
1200
1201 if (atomic_add_unless(&engine->stats.active, 1, 0))
1202 return;
1203
1204 write_seqlock_irqsave(&engine->stats.lock, flags);
1205 if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1206 engine->stats.start = ktime_get();
1207 atomic_inc(&engine->stats.active);
1208 }
1209 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1210}
1211
1212static void intel_engine_context_out(struct intel_engine_cs *engine)
1213{
1214 unsigned long flags;
1215
1216 GEM_BUG_ON(!atomic_read(&engine->stats.active));
1217
1218 if (atomic_add_unless(&engine->stats.active, -1, 1))
1219 return;
1220
1221 write_seqlock_irqsave(&engine->stats.lock, flags);
1222 if (atomic_dec_and_test(&engine->stats.active)) {
1223 engine->stats.total =
1224 ktime_add(engine->stats.total,
1225 ktime_sub(ktime_get(), engine->stats.start));
1226 }
1227 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1228}
1229
1230static void
1231execlists_check_context(const struct intel_context *ce,
1232 const struct intel_engine_cs *engine)
1233{
1234 const struct intel_ring *ring = ce->ring;
1235 u32 *regs = ce->lrc_reg_state;
1236 bool valid = true;
1237 int x;
1238
1239 if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1240 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1241 engine->name,
1242 regs[CTX_RING_START],
1243 i915_ggtt_offset(ring->vma));
1244 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1245 valid = false;
1246 }
1247
1248 if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1249 (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1250 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1251 engine->name,
1252 regs[CTX_RING_CTL],
1253 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1254 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1255 valid = false;
1256 }
1257
1258 x = lrc_ring_mi_mode(engine);
1259 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1260 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1261 engine->name, regs[x + 1]);
1262 regs[x + 1] &= ~STOP_RING;
1263 regs[x + 1] |= STOP_RING << 16;
1264 valid = false;
1265 }
1266
1267 WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
1268}
1269
1270static void restore_default_state(struct intel_context *ce,
1271 struct intel_engine_cs *engine)
1272{
1273 u32 *regs;
1274
1275 regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE);
1276 execlists_init_reg_state(regs, ce, engine, ce->ring, true);
1277
1278 ce->runtime.last = intel_context_get_runtime(ce);
1279}
1280
1281static void reset_active(struct i915_request *rq,
1282 struct intel_engine_cs *engine)
1283{
1284 struct intel_context * const ce = rq->context;
1285 u32 head;
1286
1287 /*
1288 * The executing context has been cancelled. We want to prevent
1289 * further execution along this context and propagate the error on
1290 * to anything depending on its results.
1291 *
1292 * In __i915_request_submit(), we apply the -EIO and remove the
1293 * requests' payloads for any banned requests. But first, we must
1294 * rewind the context back to the start of the incomplete request so
1295 * that we do not jump back into the middle of the batch.
1296 *
1297 * We preserve the breadcrumbs and semaphores of the incomplete
1298 * requests so that inter-timeline dependencies (i.e other timelines)
1299 * remain correctly ordered. And we defer to __i915_request_submit()
1300 * so that all asynchronous waits are correctly handled.
1301 */
1302 ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
1303 rq->fence.context, rq->fence.seqno);
1304
1305 /* On resubmission of the active request, payload will be scrubbed */
1306 if (i915_request_completed(rq))
1307 head = rq->tail;
1308 else
1309 head = active_request(ce->timeline, rq)->head;
1310 head = intel_ring_wrap(ce->ring, head);
1311
1312 /* Scrub the context image to prevent replaying the previous batch */
1313 restore_default_state(ce, engine);
1314 __execlists_update_reg_state(ce, engine, head);
1315
1316 /* We've switched away, so this should be a no-op, but intent matters */
1317 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
1318}
1319
1320static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1321{
1322#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1323 ce->runtime.num_underflow += dt < 0;
1324 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt);
1325#endif
1326}
1327
1328static void intel_context_update_runtime(struct intel_context *ce)
1329{
1330 u32 old;
1331 s32 dt;
1332
1333 if (intel_context_is_barrier(ce))
1334 return;
1335
1336 old = ce->runtime.last;
1337 ce->runtime.last = intel_context_get_runtime(ce);
1338 dt = ce->runtime.last - old;
1339
1340 if (unlikely(dt <= 0)) {
1341 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1342 old, ce->runtime.last, dt);
1343 st_update_runtime_underflow(ce, dt);
1344 return;
1345 }
1346
1347 ewma_runtime_add(&ce->runtime.avg, dt);
1348 ce->runtime.total += dt;
1349}
1350
1351static inline struct intel_engine_cs *
1352__execlists_schedule_in(struct i915_request *rq)
1353{
1354 struct intel_engine_cs * const engine = rq->engine;
1355 struct intel_context * const ce = rq->context;
1356
1357 intel_context_get(ce);
1358
1359 if (unlikely(intel_context_is_banned(ce)))
1360 reset_active(rq, engine);
1361
1362 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1363 execlists_check_context(ce, engine);
1364
1365 if (ce->tag) {
1366 /* Use a fixed tag for OA and friends */
1367 GEM_BUG_ON(ce->tag <= BITS_PER_LONG);
1368 ce->lrc.ccid = ce->tag;
1369 } else {
1370 /* We don't need a strict matching tag, just different values */
1371 unsigned int tag = ffs(READ_ONCE(engine->context_tag));
1372
1373 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
1374 clear_bit(tag - 1, &engine->context_tag);
1375 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
1376
1377 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
1378 }
1379
1380 ce->lrc.ccid |= engine->execlists.ccid;
1381
1382 __intel_gt_pm_get(engine->gt);
1383 if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active))
1384 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1385 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1386 intel_engine_context_in(engine);
1387
1388 return engine;
1389}
1390
1391static inline struct i915_request *
1392execlists_schedule_in(struct i915_request *rq, int idx)
1393{
1394 struct intel_context * const ce = rq->context;
1395 struct intel_engine_cs *old;
1396
1397 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
1398 trace_i915_request_in(rq, idx);
1399
1400 old = READ_ONCE(ce->inflight);
1401 do {
1402 if (!old) {
1403 WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
1404 break;
1405 }
1406 } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
1407
1408 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
1409 return i915_request_get(rq);
1410}
1411
1412static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1413{
1414 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
1415 struct i915_request *next = READ_ONCE(ve->request);
1416
1417 if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1418 tasklet_hi_schedule(&ve->base.execlists.tasklet);
1419}
1420
1421static inline void
1422__execlists_schedule_out(struct i915_request *rq,
1423 struct intel_engine_cs * const engine,
1424 unsigned int ccid)
1425{
1426 struct intel_context * const ce = rq->context;
1427
1428 /*
1429 * NB process_csb() is not under the engine->active.lock and hence
1430 * schedule_out can race with schedule_in meaning that we should
1431 * refrain from doing non-trivial work here.
1432 */
1433
1434 /*
1435 * If we have just completed this context, the engine may now be
1436 * idle and we want to re-enter powersaving.
1437 */
1438 if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1439 i915_request_completed(rq))
1440 intel_engine_add_retire(engine, ce->timeline);
1441
1442 ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
1443 ccid &= GEN12_MAX_CONTEXT_HW_ID;
1444 if (ccid < BITS_PER_LONG) {
1445 GEM_BUG_ON(ccid == 0);
1446 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
1447 set_bit(ccid - 1, &engine->context_tag);
1448 }
1449
1450 intel_context_update_runtime(ce);
1451 intel_engine_context_out(engine);
1452 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1453 if (engine->fw_domain && !atomic_dec_return(&engine->fw_active))
1454 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1455 intel_gt_pm_put_async(engine->gt);
1456
1457 /*
1458 * If this is part of a virtual engine, its next request may
1459 * have been blocked waiting for access to the active context.
1460 * We have to kick all the siblings again in case we need to
1461 * switch (e.g. the next request is not runnable on this
1462 * engine). Hopefully, we will already have submitted the next
1463 * request before the tasklet runs and do not need to rebuild
1464 * each virtual tree and kick everyone again.
1465 */
1466 if (ce->engine != engine)
1467 kick_siblings(rq, ce);
1468
1469 intel_context_put(ce);
1470}
1471
1472static inline void
1473execlists_schedule_out(struct i915_request *rq)
1474{
1475 struct intel_context * const ce = rq->context;
1476 struct intel_engine_cs *cur, *old;
1477 u32 ccid;
1478
1479 trace_i915_request_out(rq);
1480
1481 ccid = rq->context->lrc.ccid;
1482 old = READ_ONCE(ce->inflight);
1483 do
1484 cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
1485 while (!try_cmpxchg(&ce->inflight, &old, cur));
1486 if (!cur)
1487 __execlists_schedule_out(rq, old, ccid);
1488
1489 i915_request_put(rq);
1490}
1491
1492static u64 execlists_update_context(struct i915_request *rq)
1493{
1494 struct intel_context *ce = rq->context;
1495 u64 desc = ce->lrc.desc;
1496 u32 tail, prev;
1497
1498 /*
1499 * WaIdleLiteRestore:bdw,skl
1500 *
1501 * We should never submit the context with the same RING_TAIL twice
1502 * just in case we submit an empty ring, which confuses the HW.
1503 *
1504 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1505 * the normal request to be able to always advance the RING_TAIL on
1506 * subsequent resubmissions (for lite restore). Should that fail us,
1507 * and we try and submit the same tail again, force the context
1508 * reload.
1509 *
1510 * If we need to return to a preempted context, we need to skip the
1511 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1512 * HW has a tendency to ignore us rewinding the TAIL to the end of
1513 * an earlier request.
1514 */
1515 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
1516 prev = rq->ring->tail;
1517 tail = intel_ring_set_tail(rq->ring, rq->tail);
1518 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
1519 desc |= CTX_DESC_FORCE_RESTORE;
1520 ce->lrc_reg_state[CTX_RING_TAIL] = tail;
1521 rq->tail = rq->wa_tail;
1522
1523 /*
1524 * Make sure the context image is complete before we submit it to HW.
1525 *
1526 * Ostensibly, writes (including the WCB) should be flushed prior to
1527 * an uncached write such as our mmio register access, the empirical
1528 * evidence (esp. on Braswell) suggests that the WC write into memory
1529 * may not be visible to the HW prior to the completion of the UC
1530 * register write and that we may begin execution from the context
1531 * before its image is complete leading to invalid PD chasing.
1532 */
1533 wmb();
1534
1535 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;
1536 return desc;
1537}
1538
1539static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1540{
1541 if (execlists->ctrl_reg) {
1542 writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
1543 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
1544 } else {
1545 writel(upper_32_bits(desc), execlists->submit_reg);
1546 writel(lower_32_bits(desc), execlists->submit_reg);
1547 }
1548}
1549
1550static __maybe_unused char *
1551dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1552{
1553 if (!rq)
1554 return "";
1555
1556 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1557 prefix,
1558 rq->context->lrc.ccid,
1559 rq->fence.context, rq->fence.seqno,
1560 i915_request_completed(rq) ? "!" :
1561 i915_request_started(rq) ? "*" :
1562 "",
1563 rq_prio(rq));
1564
1565 return buf;
1566}
1567
1568static __maybe_unused void
1569trace_ports(const struct intel_engine_execlists *execlists,
1570 const char *msg,
1571 struct i915_request * const *ports)
1572{
1573 const struct intel_engine_cs *engine =
1574 container_of(execlists, typeof(*engine), execlists);
1575 char __maybe_unused p0[40], p1[40];
1576
1577 if (!ports[0])
1578 return;
1579
1580 ENGINE_TRACE(engine, "%s { %s%s }\n", msg,
1581 dump_port(p0, sizeof(p0), "", ports[0]),
1582 dump_port(p1, sizeof(p1), ", ", ports[1]));
1583}
1584
1585static inline bool
1586reset_in_progress(const struct intel_engine_execlists *execlists)
1587{
1588 return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
1589}
1590
1591static __maybe_unused bool
1592assert_pending_valid(const struct intel_engine_execlists *execlists,
1593 const char *msg)
1594{
1595 struct intel_engine_cs *engine =
1596 container_of(execlists, typeof(*engine), execlists);
1597 struct i915_request * const *port, *rq;
1598 struct intel_context *ce = NULL;
1599 bool sentinel = false;
1600 u32 ccid = -1;
1601
1602 trace_ports(execlists, msg, execlists->pending);
1603
1604 /* We may be messing around with the lists during reset, lalala */
1605 if (reset_in_progress(execlists))
1606 return true;
1607
1608 if (!execlists->pending[0]) {
1609 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",
1610 engine->name);
1611 return false;
1612 }
1613
1614 if (execlists->pending[execlists_num_ports(execlists)]) {
1615 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",
1616 engine->name, execlists_num_ports(execlists));
1617 return false;
1618 }
1619
1620 for (port = execlists->pending; (rq = *port); port++) {
1621 unsigned long flags;
1622 bool ok = true;
1623
1624 GEM_BUG_ON(!kref_read(&rq->fence.refcount));
1625 GEM_BUG_ON(!i915_request_is_active(rq));
1626
1627 if (ce == rq->context) {
1628 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",
1629 engine->name,
1630 ce->timeline->fence_context,
1631 port - execlists->pending);
1632 return false;
1633 }
1634 ce = rq->context;
1635
1636 if (ccid == ce->lrc.ccid) {
1637 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",
1638 engine->name,
1639 ccid, ce->timeline->fence_context,
1640 port - execlists->pending);
1641 return false;
1642 }
1643 ccid = ce->lrc.ccid;
1644
1645 /*
1646 * Sentinels are supposed to be the last request so they flush
1647 * the current execution off the HW. Check that they are the only
1648 * request in the pending submission.
1649 */
1650 if (sentinel) {
1651 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",
1652 engine->name,
1653 ce->timeline->fence_context,
1654 port - execlists->pending);
1655 return false;
1656 }
1657 sentinel = i915_request_has_sentinel(rq);
1658
1659 /* Hold tightly onto the lock to prevent concurrent retires! */
1660 if (!spin_trylock_irqsave(&rq->lock, flags))
1661 continue;
1662
1663 if (i915_request_completed(rq))
1664 goto unlock;
1665
1666 if (i915_active_is_idle(&ce->active) &&
1667 !intel_context_is_barrier(ce)) {
1668 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",
1669 engine->name,
1670 ce->timeline->fence_context,
1671 port - execlists->pending);
1672 ok = false;
1673 goto unlock;
1674 }
1675
1676 if (!i915_vma_is_pinned(ce->state)) {
1677 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",
1678 engine->name,
1679 ce->timeline->fence_context,
1680 port - execlists->pending);
1681 ok = false;
1682 goto unlock;
1683 }
1684
1685 if (!i915_vma_is_pinned(ce->ring->vma)) {
1686 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",
1687 engine->name,
1688 ce->timeline->fence_context,
1689 port - execlists->pending);
1690 ok = false;
1691 goto unlock;
1692 }
1693
1694unlock:
1695 spin_unlock_irqrestore(&rq->lock, flags);
1696 if (!ok)
1697 return false;
1698 }
1699
1700 return ce;
1701}
1702
1703static void execlists_submit_ports(struct intel_engine_cs *engine)
1704{
1705 struct intel_engine_execlists *execlists = &engine->execlists;
1706 unsigned int n;
1707
1708 GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
1709
1710 /*
1711 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1712 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1713 * not be relinquished until the device is idle (see
1714 * i915_gem_idle_work_handler()). As a precaution, we make sure
1715 * that all ELSP are drained i.e. we have processed the CSB,
1716 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1717 */
1718 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
1719
1720 /*
1721 * ELSQ note: the submit queue is not cleared after being submitted
1722 * to the HW so we need to make sure we always clean it up. This is
1723 * currently ensured by the fact that we always write the same number
1724 * of elsq entries, keep this in mind before changing the loop below.
1725 */
1726 for (n = execlists_num_ports(execlists); n--; ) {
1727 struct i915_request *rq = execlists->pending[n];
1728
1729 write_desc(execlists,
1730 rq ? execlists_update_context(rq) : 0,
1731 n);
1732 }
1733
1734 /* we need to manually load the submit queue */
1735 if (execlists->ctrl_reg)
1736 writel(EL_CTRL_LOAD, execlists->ctrl_reg);
1737}
1738
1739static bool ctx_single_port_submission(const struct intel_context *ce)
1740{
1741 return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
1742 intel_context_force_single_submission(ce));
1743}
1744
1745static bool can_merge_ctx(const struct intel_context *prev,
1746 const struct intel_context *next)
1747{
1748 if (prev != next)
1749 return false;
1750
1751 if (ctx_single_port_submission(prev))
1752 return false;
1753
1754 return true;
1755}
1756
1757static unsigned long i915_request_flags(const struct i915_request *rq)
1758{
1759 return READ_ONCE(rq->fence.flags);
1760}
1761
1762static bool can_merge_rq(const struct i915_request *prev,
1763 const struct i915_request *next)
1764{
1765 GEM_BUG_ON(prev == next);
1766 GEM_BUG_ON(!assert_priority_queue(prev, next));
1767
1768 /*
1769 * We do not submit known completed requests. Therefore if the next
1770 * request is already completed, we can pretend to merge it in
1771 * with the previous context (and we will skip updating the ELSP
1772 * and tracking). Thus hopefully keeping the ELSP full with active
1773 * contexts, despite the best efforts of preempt-to-busy to confuse
1774 * us.
1775 */
1776 if (i915_request_completed(next))
1777 return true;
1778
1779 if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
1780 (BIT(I915_FENCE_FLAG_NOPREEMPT) |
1781 BIT(I915_FENCE_FLAG_SENTINEL))))
1782 return false;
1783
1784 if (!can_merge_ctx(prev->context, next->context))
1785 return false;
1786
1787 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
1788 return true;
1789}
1790
1791static void virtual_update_register_offsets(u32 *regs,
1792 struct intel_engine_cs *engine)
1793{
1794 set_offsets(regs, reg_offsets(engine), engine, false);
1795}
1796
1797static bool virtual_matches(const struct virtual_engine *ve,
1798 const struct i915_request *rq,
1799 const struct intel_engine_cs *engine)
1800{
1801 const struct intel_engine_cs *inflight;
1802
1803 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1804 return false;
1805
1806 /*
1807 * We track when the HW has completed saving the context image
1808 * (i.e. when we have seen the final CS event switching out of
1809 * the context) and must not overwrite the context image before
1810 * then. This restricts us to only using the active engine
1811 * while the previous virtualized request is inflight (so
1812 * we reuse the register offsets). This is a very small
1813 * hystersis on the greedy seelction algorithm.
1814 */
1815 inflight = intel_context_inflight(&ve->context);
1816 if (inflight && inflight != engine)
1817 return false;
1818
1819 return true;
1820}
1821
1822static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
1823{
1824 /*
1825 * All the outstanding signals on ve->siblings[0] must have
1826 * been completed, just pending the interrupt handler. As those
1827 * signals still refer to the old sibling (via rq->engine), we must
1828 * transfer those to the old irq_worker to keep our locking
1829 * consistent.
1830 */
1831 intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
1832}
1833
1834#define for_each_waiter(p__, rq__) \
1835 list_for_each_entry_lockless(p__, \
1836 &(rq__)->sched.waiters_list, \
1837 wait_link)
1838
1839#define for_each_signaler(p__, rq__) \
1840 list_for_each_entry_rcu(p__, \
1841 &(rq__)->sched.signalers_list, \
1842 signal_link)
1843
1844static void defer_request(struct i915_request *rq, struct list_head * const pl)
1845{
1846 LIST_HEAD(list);
1847
1848 /*
1849 * We want to move the interrupted request to the back of
1850 * the round-robin list (i.e. its priority level), but
1851 * in doing so, we must then move all requests that were in
1852 * flight and were waiting for the interrupted request to
1853 * be run after it again.
1854 */
1855 do {
1856 struct i915_dependency *p;
1857
1858 GEM_BUG_ON(i915_request_is_active(rq));
1859 list_move_tail(&rq->sched.link, pl);
1860
1861 for_each_waiter(p, rq) {
1862 struct i915_request *w =
1863 container_of(p->waiter, typeof(*w), sched);
1864
1865 if (p->flags & I915_DEPENDENCY_WEAK)
1866 continue;
1867
1868 /* Leave semaphores spinning on the other engines */
1869 if (w->engine != rq->engine)
1870 continue;
1871
1872 /* No waiter should start before its signaler */
1873 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&
1874 i915_request_started(w) &&
1875 !i915_request_completed(rq));
1876
1877 GEM_BUG_ON(i915_request_is_active(w));
1878 if (!i915_request_is_ready(w))
1879 continue;
1880
1881 if (rq_prio(w) < rq_prio(rq))
1882 continue;
1883
1884 GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
1885 list_move_tail(&w->sched.link, &list);
1886 }
1887
1888 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
1889 } while (rq);
1890}
1891
1892static void defer_active(struct intel_engine_cs *engine)
1893{
1894 struct i915_request *rq;
1895
1896 rq = __unwind_incomplete_requests(engine);
1897 if (!rq)
1898 return;
1899
1900 defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1901}
1902
1903static bool
1904need_timeslice(const struct intel_engine_cs *engine,
1905 const struct i915_request *rq,
1906 const struct rb_node *rb)
1907{
1908 int hint;
1909
1910 if (!intel_engine_has_timeslices(engine))
1911 return false;
1912
1913 hint = engine->execlists.queue_priority_hint;
1914
1915 if (rb) {
1916 const struct virtual_engine *ve =
1917 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
1918 const struct intel_engine_cs *inflight =
1919 intel_context_inflight(&ve->context);
1920
1921 if (!inflight || inflight == engine) {
1922 struct i915_request *next;
1923
1924 rcu_read_lock();
1925 next = READ_ONCE(ve->request);
1926 if (next)
1927 hint = max(hint, rq_prio(next));
1928 rcu_read_unlock();
1929 }
1930 }
1931
1932 if (!list_is_last(&rq->sched.link, &engine->active.requests))
1933 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)));
1934
1935 GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE);
1936 return hint >= effective_prio(rq);
1937}
1938
1939static bool
1940timeslice_yield(const struct intel_engine_execlists *el,
1941 const struct i915_request *rq)
1942{
1943 /*
1944 * Once bitten, forever smitten!
1945 *
1946 * If the active context ever busy-waited on a semaphore,
1947 * it will be treated as a hog until the end of its timeslice (i.e.
1948 * until it is scheduled out and replaced by a new submission,
1949 * possibly even its own lite-restore). The HW only sends an interrupt
1950 * on the first miss, and we do know if that semaphore has been
1951 * signaled, or even if it is now stuck on another semaphore. Play
1952 * safe, yield if it might be stuck -- it will be given a fresh
1953 * timeslice in the near future.
1954 */
1955 return rq->context->lrc.ccid == READ_ONCE(el->yield);
1956}
1957
1958static bool
1959timeslice_expired(const struct intel_engine_execlists *el,
1960 const struct i915_request *rq)
1961{
1962 return timer_expired(&el->timer) || timeslice_yield(el, rq);
1963}
1964
1965static int
1966switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1967{
1968 if (list_is_last(&rq->sched.link, &engine->active.requests))
1969 return engine->execlists.queue_priority_hint;
1970
1971 return rq_prio(list_next_entry(rq, sched.link));
1972}
1973
1974static inline unsigned long
1975timeslice(const struct intel_engine_cs *engine)
1976{
1977 return READ_ONCE(engine->props.timeslice_duration_ms);
1978}
1979
1980static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1981{
1982 const struct intel_engine_execlists *execlists = &engine->execlists;
1983 const struct i915_request *rq = *execlists->active;
1984
1985 if (!rq || i915_request_completed(rq))
1986 return 0;
1987
1988 if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq))
1989 return 0;
1990
1991 return timeslice(engine);
1992}
1993
1994static void set_timeslice(struct intel_engine_cs *engine)
1995{
1996 unsigned long duration;
1997
1998 if (!intel_engine_has_timeslices(engine))
1999 return;
2000
2001 duration = active_timeslice(engine);
2002 ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration);
2003
2004 set_timer_ms(&engine->execlists.timer, duration);
2005}
2006
2007static void start_timeslice(struct intel_engine_cs *engine, int prio)
2008{
2009 struct intel_engine_execlists *execlists = &engine->execlists;
2010 unsigned long duration;
2011
2012 if (!intel_engine_has_timeslices(engine))
2013 return;
2014
2015 WRITE_ONCE(execlists->switch_priority_hint, prio);
2016 if (prio == INT_MIN)
2017 return;
2018
2019 if (timer_pending(&execlists->timer))
2020 return;
2021
2022 duration = timeslice(engine);
2023 ENGINE_TRACE(engine,
2024 "start timeslicing, prio:%d, interval:%lu",
2025 prio, duration);
2026
2027 set_timer_ms(&execlists->timer, duration);
2028}
2029
2030static void record_preemption(struct intel_engine_execlists *execlists)
2031{
2032 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
2033}
2034
2035static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2036 const struct i915_request *rq)
2037{
2038 if (!rq)
2039 return 0;
2040
2041 /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2042 if (unlikely(intel_context_is_banned(rq->context)))
2043 return 1;
2044
2045 return READ_ONCE(engine->props.preempt_timeout_ms);
2046}
2047
2048static void set_preempt_timeout(struct intel_engine_cs *engine,
2049 const struct i915_request *rq)
2050{
2051 if (!intel_engine_has_preempt_reset(engine))
2052 return;
2053
2054 set_timer_ms(&engine->execlists.preempt,
2055 active_preempt_timeout(engine, rq));
2056}
2057
2058static inline void clear_ports(struct i915_request **ports, int count)
2059{
2060 memset_p((void **)ports, NULL, count);
2061}
2062
2063static inline void
2064copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2065{
2066 /* A memcpy_p() would be very useful here! */
2067 while (count--)
2068 WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
2069}
2070
2071static void execlists_dequeue(struct intel_engine_cs *engine)
2072{
2073 struct intel_engine_execlists * const execlists = &engine->execlists;
2074 struct i915_request **port = execlists->pending;
2075 struct i915_request ** const last_port = port + execlists->port_mask;
2076 struct i915_request * const *active;
2077 struct i915_request *last;
2078 struct rb_node *rb;
2079 bool submit = false;
2080
2081 /*
2082 * Hardware submission is through 2 ports. Conceptually each port
2083 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2084 * static for a context, and unique to each, so we only execute
2085 * requests belonging to a single context from each ring. RING_HEAD
2086 * is maintained by the CS in the context image, it marks the place
2087 * where it got up to last time, and through RING_TAIL we tell the CS
2088 * where we want to execute up to this time.
2089 *
2090 * In this list the requests are in order of execution. Consecutive
2091 * requests from the same context are adjacent in the ringbuffer. We
2092 * can combine these requests into a single RING_TAIL update:
2093 *
2094 * RING_HEAD...req1...req2
2095 * ^- RING_TAIL
2096 * since to execute req2 the CS must first execute req1.
2097 *
2098 * Our goal then is to point each port to the end of a consecutive
2099 * sequence of requests as being the most optimal (fewest wake ups
2100 * and context switches) submission.
2101 */
2102
2103 for (rb = rb_first_cached(&execlists->virtual); rb; ) {
2104 struct virtual_engine *ve =
2105 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2106 struct i915_request *rq = READ_ONCE(ve->request);
2107
2108 if (!rq) { /* lazily cleanup after another engine handled rq */
2109 rb_erase_cached(rb, &execlists->virtual);
2110 RB_CLEAR_NODE(rb);
2111 rb = rb_first_cached(&execlists->virtual);
2112 continue;
2113 }
2114
2115 if (!virtual_matches(ve, rq, engine)) {
2116 rb = rb_next(rb);
2117 continue;
2118 }
2119
2120 break;
2121 }
2122
2123 /*
2124 * If the queue is higher priority than the last
2125 * request in the currently active context, submit afresh.
2126 * We will resubmit again afterwards in case we need to split
2127 * the active context to interject the preemption request,
2128 * i.e. we will retrigger preemption following the ack in case
2129 * of trouble.
2130 */
2131 active = READ_ONCE(execlists->active);
2132
2133 /*
2134 * In theory we can skip over completed contexts that have not
2135 * yet been processed by events (as those events are in flight):
2136 *
2137 * while ((last = *active) && i915_request_completed(last))
2138 * active++;
2139 *
2140 * However, the GPU cannot handle this as it will ultimately
2141 * find itself trying to jump back into a context it has just
2142 * completed and barf.
2143 */
2144
2145 if ((last = *active)) {
2146 if (need_preempt(engine, last, rb)) {
2147 if (i915_request_completed(last)) {
2148 tasklet_hi_schedule(&execlists->tasklet);
2149 return;
2150 }
2151
2152 ENGINE_TRACE(engine,
2153 "preempting last=%llx:%lld, prio=%d, hint=%d\n",
2154 last->fence.context,
2155 last->fence.seqno,
2156 last->sched.attr.priority,
2157 execlists->queue_priority_hint);
2158 record_preemption(execlists);
2159
2160 /*
2161 * Don't let the RING_HEAD advance past the breadcrumb
2162 * as we unwind (and until we resubmit) so that we do
2163 * not accidentally tell it to go backwards.
2164 */
2165 ring_set_paused(engine, 1);
2166
2167 /*
2168 * Note that we have not stopped the GPU at this point,
2169 * so we are unwinding the incomplete requests as they
2170 * remain inflight and so by the time we do complete
2171 * the preemption, some of the unwound requests may
2172 * complete!
2173 */
2174 __unwind_incomplete_requests(engine);
2175
2176 last = NULL;
2177 } else if (need_timeslice(engine, last, rb) &&
2178 timeslice_expired(execlists, last)) {
2179 if (i915_request_completed(last)) {
2180 tasklet_hi_schedule(&execlists->tasklet);
2181 return;
2182 }
2183
2184 ENGINE_TRACE(engine,
2185 "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
2186 last->fence.context,
2187 last->fence.seqno,
2188 last->sched.attr.priority,
2189 execlists->queue_priority_hint,
2190 yesno(timeslice_yield(execlists, last)));
2191
2192 ring_set_paused(engine, 1);
2193 defer_active(engine);
2194
2195 /*
2196 * Unlike for preemption, if we rewind and continue
2197 * executing the same context as previously active,
2198 * the order of execution will remain the same and
2199 * the tail will only advance. We do not need to
2200 * force a full context restore, as a lite-restore
2201 * is sufficient to resample the monotonic TAIL.
2202 *
2203 * If we switch to any other context, similarly we
2204 * will not rewind TAIL of current context, and
2205 * normal save/restore will preserve state and allow
2206 * us to later continue executing the same request.
2207 */
2208 last = NULL;
2209 } else {
2210 /*
2211 * Otherwise if we already have a request pending
2212 * for execution after the current one, we can
2213 * just wait until the next CS event before
2214 * queuing more. In either case we will force a
2215 * lite-restore preemption event, but if we wait
2216 * we hopefully coalesce several updates into a single
2217 * submission.
2218 */
2219 if (!list_is_last(&last->sched.link,
2220 &engine->active.requests)) {
2221 /*
2222 * Even if ELSP[1] is occupied and not worthy
2223 * of timeslices, our queue might be.
2224 */
2225 start_timeslice(engine, queue_prio(execlists));
2226 return;
2227 }
2228 }
2229 }
2230
2231 while (rb) { /* XXX virtual is always taking precedence */
2232 struct virtual_engine *ve =
2233 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
2234 struct i915_request *rq;
2235
2236 spin_lock(&ve->base.active.lock);
2237
2238 rq = ve->request;
2239 if (unlikely(!rq)) { /* lost the race to a sibling */
2240 spin_unlock(&ve->base.active.lock);
2241 rb_erase_cached(rb, &execlists->virtual);
2242 RB_CLEAR_NODE(rb);
2243 rb = rb_first_cached(&execlists->virtual);
2244 continue;
2245 }
2246
2247 GEM_BUG_ON(rq != ve->request);
2248 GEM_BUG_ON(rq->engine != &ve->base);
2249 GEM_BUG_ON(rq->context != &ve->context);
2250
2251 if (rq_prio(rq) >= queue_prio(execlists)) {
2252 if (!virtual_matches(ve, rq, engine)) {
2253 spin_unlock(&ve->base.active.lock);
2254 rb = rb_next(rb);
2255 continue;
2256 }
2257
2258 if (last && !can_merge_rq(last, rq)) {
2259 spin_unlock(&ve->base.active.lock);
2260 start_timeslice(engine, rq_prio(rq));
2261 return; /* leave this for another sibling */
2262 }
2263
2264 ENGINE_TRACE(engine,
2265 "virtual rq=%llx:%lld%s, new engine? %s\n",
2266 rq->fence.context,
2267 rq->fence.seqno,
2268 i915_request_completed(rq) ? "!" :
2269 i915_request_started(rq) ? "*" :
2270 "",
2271 yesno(engine != ve->siblings[0]));
2272
2273 WRITE_ONCE(ve->request, NULL);
2274 WRITE_ONCE(ve->base.execlists.queue_priority_hint,
2275 INT_MIN);
2276 rb_erase_cached(rb, &execlists->virtual);
2277 RB_CLEAR_NODE(rb);
2278
2279 GEM_BUG_ON(!(rq->execution_mask & engine->mask));
2280 WRITE_ONCE(rq->engine, engine);
2281
2282 if (engine != ve->siblings[0]) {
2283 u32 *regs = ve->context.lrc_reg_state;
2284 unsigned int n;
2285
2286 GEM_BUG_ON(READ_ONCE(ve->context.inflight));
2287
2288 if (!intel_engine_has_relative_mmio(engine))
2289 virtual_update_register_offsets(regs,
2290 engine);
2291
2292 if (!list_empty(&ve->context.signals))
2293 virtual_xfer_breadcrumbs(ve);
2294
2295 /*
2296 * Move the bound engine to the top of the list
2297 * for future execution. We then kick this
2298 * tasklet first before checking others, so that
2299 * we preferentially reuse this set of bound
2300 * registers.
2301 */
2302 for (n = 1; n < ve->num_siblings; n++) {
2303 if (ve->siblings[n] == engine) {
2304 swap(ve->siblings[n],
2305 ve->siblings[0]);
2306 break;
2307 }
2308 }
2309
2310 GEM_BUG_ON(ve->siblings[0] != engine);
2311 }
2312
2313 if (__i915_request_submit(rq)) {
2314 submit = true;
2315 last = rq;
2316 }
2317 i915_request_put(rq);
2318
2319 /*
2320 * Hmm, we have a bunch of virtual engine requests,
2321 * but the first one was already completed (thanks
2322 * preempt-to-busy!). Keep looking at the veng queue
2323 * until we have no more relevant requests (i.e.
2324 * the normal submit queue has higher priority).
2325 */
2326 if (!submit) {
2327 spin_unlock(&ve->base.active.lock);
2328 rb = rb_first_cached(&execlists->virtual);
2329 continue;
2330 }
2331 }
2332
2333 spin_unlock(&ve->base.active.lock);
2334 break;
2335 }
2336
2337 while ((rb = rb_first_cached(&execlists->queue))) {
2338 struct i915_priolist *p = to_priolist(rb);
2339 struct i915_request *rq, *rn;
2340 int i;
2341
2342 priolist_for_each_request_consume(rq, rn, p, i) {
2343 bool merge = true;
2344
2345 /*
2346 * Can we combine this request with the current port?
2347 * It has to be the same context/ringbuffer and not
2348 * have any exceptions (e.g. GVT saying never to
2349 * combine contexts).
2350 *
2351 * If we can combine the requests, we can execute both
2352 * by updating the RING_TAIL to point to the end of the
2353 * second request, and so we never need to tell the
2354 * hardware about the first.
2355 */
2356 if (last && !can_merge_rq(last, rq)) {
2357 /*
2358 * If we are on the second port and cannot
2359 * combine this request with the last, then we
2360 * are done.
2361 */
2362 if (port == last_port)
2363 goto done;
2364
2365 /*
2366 * We must not populate both ELSP[] with the
2367 * same LRCA, i.e. we must submit 2 different
2368 * contexts if we submit 2 ELSP.
2369 */
2370 if (last->context == rq->context)
2371 goto done;
2372
2373 if (i915_request_has_sentinel(last))
2374 goto done;
2375
2376 /*
2377 * If GVT overrides us we only ever submit
2378 * port[0], leaving port[1] empty. Note that we
2379 * also have to be careful that we don't queue
2380 * the same context (even though a different
2381 * request) to the second port.
2382 */
2383 if (ctx_single_port_submission(last->context) ||
2384 ctx_single_port_submission(rq->context))
2385 goto done;
2386
2387 merge = false;
2388 }
2389
2390 if (__i915_request_submit(rq)) {
2391 if (!merge) {
2392 *port = execlists_schedule_in(last, port - execlists->pending);
2393 port++;
2394 last = NULL;
2395 }
2396
2397 GEM_BUG_ON(last &&
2398 !can_merge_ctx(last->context,
2399 rq->context));
2400 GEM_BUG_ON(last &&
2401 i915_seqno_passed(last->fence.seqno,
2402 rq->fence.seqno));
2403
2404 submit = true;
2405 last = rq;
2406 }
2407 }
2408
2409 rb_erase_cached(&p->node, &execlists->queue);
2410 i915_priolist_free(p);
2411 }
2412
2413done:
2414 /*
2415 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2416 *
2417 * We choose the priority hint such that if we add a request of greater
2418 * priority than this, we kick the submission tasklet to decide on
2419 * the right order of submitting the requests to hardware. We must
2420 * also be prepared to reorder requests as they are in-flight on the
2421 * HW. We derive the priority hint then as the first "hole" in
2422 * the HW submission ports and if there are no available slots,
2423 * the priority of the lowest executing request, i.e. last.
2424 *
2425 * When we do receive a higher priority request ready to run from the
2426 * user, see queue_request(), the priority hint is bumped to that
2427 * request triggering preemption on the next dequeue (or subsequent
2428 * interrupt for secondary ports).
2429 */
2430 execlists->queue_priority_hint = queue_prio(execlists);
2431
2432 if (submit) {
2433 *port = execlists_schedule_in(last, port - execlists->pending);
2434 execlists->switch_priority_hint =
2435 switch_prio(engine, *execlists->pending);
2436
2437 /*
2438 * Skip if we ended up with exactly the same set of requests,
2439 * e.g. trying to timeslice a pair of ordered contexts
2440 */
2441 if (!memcmp(active, execlists->pending,
2442 (port - execlists->pending + 1) * sizeof(*port))) {
2443 do
2444 execlists_schedule_out(fetch_and_zero(port));
2445 while (port-- != execlists->pending);
2446
2447 goto skip_submit;
2448 }
2449 clear_ports(port + 1, last_port - port);
2450
2451 WRITE_ONCE(execlists->yield, -1);
2452 set_preempt_timeout(engine, *active);
2453 execlists_submit_ports(engine);
2454 } else {
2455 start_timeslice(engine, execlists->queue_priority_hint);
2456skip_submit:
2457 ring_set_paused(engine, 0);
2458 }
2459}
2460
2461static void
2462cancel_port_requests(struct intel_engine_execlists * const execlists)
2463{
2464 struct i915_request * const *port;
2465
2466 for (port = execlists->pending; *port; port++)
2467 execlists_schedule_out(*port);
2468 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
2469
2470 /* Mark the end of active before we overwrite *active */
2471 for (port = xchg(&execlists->active, execlists->pending); *port; port++)
2472 execlists_schedule_out(*port);
2473 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
2474
2475 smp_wmb(); /* complete the seqlock for execlists_active() */
2476 WRITE_ONCE(execlists->active, execlists->inflight);
2477}
2478
2479static inline void
2480invalidate_csb_entries(const u32 *first, const u32 *last)
2481{
2482 clflush((void *)first);
2483 clflush((void *)last);
2484}
2485
2486/*
2487 * Starting with Gen12, the status has a new format:
2488 *
2489 * bit 0: switched to new queue
2490 * bit 1: reserved
2491 * bit 2: semaphore wait mode (poll or signal), only valid when
2492 * switch detail is set to "wait on semaphore"
2493 * bits 3-5: engine class
2494 * bits 6-11: engine instance
2495 * bits 12-14: reserved
2496 * bits 15-25: sw context id of the lrc the GT switched to
2497 * bits 26-31: sw counter of the lrc the GT switched to
2498 * bits 32-35: context switch detail
2499 * - 0: ctx complete
2500 * - 1: wait on sync flip
2501 * - 2: wait on vblank
2502 * - 3: wait on scanline
2503 * - 4: wait on semaphore
2504 * - 5: context preempted (not on SEMAPHORE_WAIT or
2505 * WAIT_FOR_EVENT)
2506 * bit 36: reserved
2507 * bits 37-43: wait detail (for switch detail 1 to 4)
2508 * bits 44-46: reserved
2509 * bits 47-57: sw context id of the lrc the GT switched away from
2510 * bits 58-63: sw counter of the lrc the GT switched away from
2511 */
2512static inline bool
2513gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2514{
2515 u32 lower_dw = csb[0];
2516 u32 upper_dw = csb[1];
2517 bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
2518 bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
2519 bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
2520
2521 /*
2522 * The context switch detail is not guaranteed to be 5 when a preemption
2523 * occurs, so we can't just check for that. The check below works for
2524 * all the cases we care about, including preemptions of WAIT
2525 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2526 * would require some extra handling, but we don't support that.
2527 */
2528 if (!ctx_away_valid || new_queue) {
2529 GEM_BUG_ON(!ctx_to_valid);
2530 return true;
2531 }
2532
2533 /*
2534 * switch detail = 5 is covered by the case above and we do not expect a
2535 * context switch on an unsuccessful wait instruction since we always
2536 * use polling mode.
2537 */
2538 GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
2539 return false;
2540}
2541
2542static inline bool
2543gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
2544{
2545 return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
2546}
2547
2548static void process_csb(struct intel_engine_cs *engine)
2549{
2550 struct intel_engine_execlists * const execlists = &engine->execlists;
2551 const u32 * const buf = execlists->csb_status;
2552 const u8 num_entries = execlists->csb_size;
2553 u8 head, tail;
2554
2555 /*
2556 * As we modify our execlists state tracking we require exclusive
2557 * access. Either we are inside the tasklet, or the tasklet is disabled
2558 * and we assume that is only inside the reset paths and so serialised.
2559 */
2560 GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
2561 !reset_in_progress(execlists));
2562 GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
2563
2564 /*
2565 * Note that csb_write, csb_status may be either in HWSP or mmio.
2566 * When reading from the csb_write mmio register, we have to be
2567 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2568 * the low 4bits. As it happens we know the next 4bits are always
2569 * zero and so we can simply masked off the low u8 of the register
2570 * and treat it identically to reading from the HWSP (without having
2571 * to use explicit shifting and masking, and probably bifurcating
2572 * the code to handle the legacy mmio read).
2573 */
2574 head = execlists->csb_head;
2575 tail = READ_ONCE(*execlists->csb_write);
2576 if (unlikely(head == tail))
2577 return;
2578
2579 /*
2580 * We will consume all events from HW, or at least pretend to.
2581 *
2582 * The sequence of events from the HW is deterministic, and derived
2583 * from our writes to the ELSP, with a smidgen of variability for
2584 * the arrival of the asynchronous requests wrt to the inflight
2585 * execution. If the HW sends an event that does not correspond with
2586 * the one we are expecting, we have to abandon all hope as we lose
2587 * all tracking of what the engine is actually executing. We will
2588 * only detect we are out of sequence with the HW when we get an
2589 * 'impossible' event because we have already drained our own
2590 * preemption/promotion queue. If this occurs, we know that we likely
2591 * lost track of execution earlier and must unwind and restart, the
2592 * simplest way is by stop processing the event queue and force the
2593 * engine to reset.
2594 */
2595 execlists->csb_head = tail;
2596 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
2597
2598 /*
2599 * Hopefully paired with a wmb() in HW!
2600 *
2601 * We must complete the read of the write pointer before any reads
2602 * from the CSB, so that we do not see stale values. Without an rmb
2603 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2604 * we perform the READ_ONCE(*csb_write).
2605 */
2606 rmb();
2607 do {
2608 bool promote;
2609
2610 if (++head == num_entries)
2611 head = 0;
2612
2613 /*
2614 * We are flying near dragons again.
2615 *
2616 * We hold a reference to the request in execlist_port[]
2617 * but no more than that. We are operating in softirq
2618 * context and so cannot hold any mutex or sleep. That
2619 * prevents us stopping the requests we are processing
2620 * in port[] from being retired simultaneously (the
2621 * breadcrumb will be complete before we see the
2622 * context-switch). As we only hold the reference to the
2623 * request, any pointer chasing underneath the request
2624 * is subject to a potential use-after-free. Thus we
2625 * store all of the bookkeeping within port[] as
2626 * required, and avoid using unguarded pointers beneath
2627 * request itself. The same applies to the atomic
2628 * status notifier.
2629 */
2630
2631 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
2632 head, buf[2 * head + 0], buf[2 * head + 1]);
2633
2634 if (INTEL_GEN(engine->i915) >= 12)
2635 promote = gen12_csb_parse(execlists, buf + 2 * head);
2636 else
2637 promote = gen8_csb_parse(execlists, buf + 2 * head);
2638 if (promote) {
2639 struct i915_request * const *old = execlists->active;
2640
2641 if (GEM_WARN_ON(!*execlists->pending)) {
2642 execlists->error_interrupt |= ERROR_CSB;
2643 break;
2644 }
2645
2646 ring_set_paused(engine, 0);
2647
2648 /* Point active to the new ELSP; prevent overwriting */
2649 WRITE_ONCE(execlists->active, execlists->pending);
2650 smp_wmb(); /* notify execlists_active() */
2651
2652 /* cancel old inflight, prepare for switch */
2653 trace_ports(execlists, "preempted", old);
2654 while (*old)
2655 execlists_schedule_out(*old++);
2656
2657 /* switch pending to inflight */
2658 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
2659 copy_ports(execlists->inflight,
2660 execlists->pending,
2661 execlists_num_ports(execlists));
2662 smp_wmb(); /* complete the seqlock */
2663 WRITE_ONCE(execlists->active, execlists->inflight);
2664
2665 WRITE_ONCE(execlists->pending[0], NULL);
2666 } else {
2667 if (GEM_WARN_ON(!*execlists->active)) {
2668 execlists->error_interrupt |= ERROR_CSB;
2669 break;
2670 }
2671
2672 /* port0 completed, advanced to port1 */
2673 trace_ports(execlists, "completed", execlists->active);
2674
2675 /*
2676 * We rely on the hardware being strongly
2677 * ordered, that the breadcrumb write is
2678 * coherent (visible from the CPU) before the
2679 * user interrupt is processed. One might assume
2680 * that the breadcrumb write being before the
2681 * user interrupt and the CS event for the context
2682 * switch would therefore be before the CS event
2683 * itself...
2684 */
2685 if (GEM_SHOW_DEBUG() &&
2686 !i915_request_completed(*execlists->active)) {
2687 struct i915_request *rq = *execlists->active;
2688 const u32 *regs __maybe_unused =
2689 rq->context->lrc_reg_state;
2690
2691 ENGINE_TRACE(engine,
2692 "context completed before request!\n");
2693 ENGINE_TRACE(engine,
2694 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",
2695 ENGINE_READ(engine, RING_START),
2696 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,
2697 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,
2698 ENGINE_READ(engine, RING_CTL),
2699 ENGINE_READ(engine, RING_MI_MODE));
2700 ENGINE_TRACE(engine,
2701 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",
2702 i915_ggtt_offset(rq->ring->vma),
2703 rq->head, rq->tail,
2704 rq->fence.context,
2705 lower_32_bits(rq->fence.seqno),
2706 hwsp_seqno(rq));
2707 ENGINE_TRACE(engine,
2708 "ctx:{start:%08x, head:%04x, tail:%04x}, ",
2709 regs[CTX_RING_START],
2710 regs[CTX_RING_HEAD],
2711 regs[CTX_RING_TAIL]);
2712 }
2713
2714 execlists_schedule_out(*execlists->active++);
2715
2716 GEM_BUG_ON(execlists->active - execlists->inflight >
2717 execlists_num_ports(execlists));
2718 }
2719 } while (head != tail);
2720
2721 set_timeslice(engine);
2722
2723 /*
2724 * Gen11 has proven to fail wrt global observation point between
2725 * entry and tail update, failing on the ordering and thus
2726 * we see an old entry in the context status buffer.
2727 *
2728 * Forcibly evict out entries for the next gpu csb update,
2729 * to increase the odds that we get a fresh entries with non
2730 * working hardware. The cost for doing so comes out mostly with
2731 * the wash as hardware, working or not, will need to do the
2732 * invalidation before.
2733 */
2734 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2735}
2736
2737static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2738{
2739 lockdep_assert_held(&engine->active.lock);
2740 if (!READ_ONCE(engine->execlists.pending[0])) {
2741 rcu_read_lock(); /* protect peeking at execlists->active */
2742 execlists_dequeue(engine);
2743 rcu_read_unlock();
2744 }
2745}
2746
2747static void __execlists_hold(struct i915_request *rq)
2748{
2749 LIST_HEAD(list);
2750
2751 do {
2752 struct i915_dependency *p;
2753
2754 if (i915_request_is_active(rq))
2755 __i915_request_unsubmit(rq);
2756
2757 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2758 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2759 i915_request_set_hold(rq);
2760 RQ_TRACE(rq, "on hold\n");
2761
2762 for_each_waiter(p, rq) {
2763 struct i915_request *w =
2764 container_of(p->waiter, typeof(*w), sched);
2765
2766 /* Leave semaphores spinning on the other engines */
2767 if (w->engine != rq->engine)
2768 continue;
2769
2770 if (!i915_request_is_ready(w))
2771 continue;
2772
2773 if (i915_request_completed(w))
2774 continue;
2775
2776 if (i915_request_on_hold(w))
2777 continue;
2778
2779 list_move_tail(&w->sched.link, &list);
2780 }
2781
2782 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2783 } while (rq);
2784}
2785
2786static bool execlists_hold(struct intel_engine_cs *engine,
2787 struct i915_request *rq)
2788{
2789 spin_lock_irq(&engine->active.lock);
2790
2791 if (i915_request_completed(rq)) { /* too late! */
2792 rq = NULL;
2793 goto unlock;
2794 }
2795
2796 if (rq->engine != engine) { /* preempted virtual engine */
2797 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2798
2799 /*
2800 * intel_context_inflight() is only protected by virtue
2801 * of process_csb() being called only by the tasklet (or
2802 * directly from inside reset while the tasklet is suspended).
2803 * Assert that neither of those are allowed to run while we
2804 * poke at the request queues.
2805 */
2806 GEM_BUG_ON(!reset_in_progress(&engine->execlists));
2807
2808 /*
2809 * An unsubmitted request along a virtual engine will
2810 * remain on the active (this) engine until we are able
2811 * to process the context switch away (and so mark the
2812 * context as no longer in flight). That cannot have happened
2813 * yet, otherwise we would not be hanging!
2814 */
2815 spin_lock(&ve->base.active.lock);
2816 GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
2817 GEM_BUG_ON(ve->request != rq);
2818 ve->request = NULL;
2819 spin_unlock(&ve->base.active.lock);
2820 i915_request_put(rq);
2821
2822 rq->engine = engine;
2823 }
2824
2825 /*
2826 * Transfer this request onto the hold queue to prevent it
2827 * being resumbitted to HW (and potentially completed) before we have
2828 * released it. Since we may have already submitted following
2829 * requests, we need to remove those as well.
2830 */
2831 GEM_BUG_ON(i915_request_on_hold(rq));
2832 GEM_BUG_ON(rq->engine != engine);
2833 __execlists_hold(rq);
2834 GEM_BUG_ON(list_empty(&engine->active.hold));
2835
2836unlock:
2837 spin_unlock_irq(&engine->active.lock);
2838 return rq;
2839}
2840
2841static bool hold_request(const struct i915_request *rq)
2842{
2843 struct i915_dependency *p;
2844 bool result = false;
2845
2846 /*
2847 * If one of our ancestors is on hold, we must also be on hold,
2848 * otherwise we will bypass it and execute before it.
2849 */
2850 rcu_read_lock();
2851 for_each_signaler(p, rq) {
2852 const struct i915_request *s =
2853 container_of(p->signaler, typeof(*s), sched);
2854
2855 if (s->engine != rq->engine)
2856 continue;
2857
2858 result = i915_request_on_hold(s);
2859 if (result)
2860 break;
2861 }
2862 rcu_read_unlock();
2863
2864 return result;
2865}
2866
2867static void __execlists_unhold(struct i915_request *rq)
2868{
2869 LIST_HEAD(list);
2870
2871 do {
2872 struct i915_dependency *p;
2873
2874 RQ_TRACE(rq, "hold release\n");
2875
2876 GEM_BUG_ON(!i915_request_on_hold(rq));
2877 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
2878
2879 i915_request_clear_hold(rq);
2880 list_move_tail(&rq->sched.link,
2881 i915_sched_lookup_priolist(rq->engine,
2882 rq_prio(rq)));
2883 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2884
2885 /* Also release any children on this engine that are ready */
2886 for_each_waiter(p, rq) {
2887 struct i915_request *w =
2888 container_of(p->waiter, typeof(*w), sched);
2889
2890 /* Propagate any change in error status */
2891 if (rq->fence.error)
2892 i915_request_set_error_once(w, rq->fence.error);
2893
2894 if (w->engine != rq->engine)
2895 continue;
2896
2897 if (!i915_request_on_hold(w))
2898 continue;
2899
2900 /* Check that no other parents are also on hold */
2901 if (hold_request(w))
2902 continue;
2903
2904 list_move_tail(&w->sched.link, &list);
2905 }
2906
2907 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
2908 } while (rq);
2909}
2910
2911static void execlists_unhold(struct intel_engine_cs *engine,
2912 struct i915_request *rq)
2913{
2914 spin_lock_irq(&engine->active.lock);
2915
2916 /*
2917 * Move this request back to the priority queue, and all of its
2918 * children and grandchildren that were suspended along with it.
2919 */
2920 __execlists_unhold(rq);
2921
2922 if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2923 engine->execlists.queue_priority_hint = rq_prio(rq);
2924 tasklet_hi_schedule(&engine->execlists.tasklet);
2925 }
2926
2927 spin_unlock_irq(&engine->active.lock);
2928}
2929
2930struct execlists_capture {
2931 struct work_struct work;
2932 struct i915_request *rq;
2933 struct i915_gpu_coredump *error;
2934};
2935
2936static void execlists_capture_work(struct work_struct *work)
2937{
2938 struct execlists_capture *cap = container_of(work, typeof(*cap), work);
2939 const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
2940 struct intel_engine_cs *engine = cap->rq->engine;
2941 struct intel_gt_coredump *gt = cap->error->gt;
2942 struct intel_engine_capture_vma *vma;
2943
2944 /* Compress all the objects attached to the request, slow! */
2945 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2946 if (vma) {
2947 struct i915_vma_compress *compress =
2948 i915_vma_capture_prepare(gt);
2949
2950 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2951 i915_vma_capture_finish(gt, compress);
2952 }
2953
2954 gt->simulated = gt->engine->simulated;
2955 cap->error->simulated = gt->simulated;
2956
2957 /* Publish the error state, and announce it to the world */
2958 i915_error_state_store(cap->error);
2959 i915_gpu_coredump_put(cap->error);
2960
2961 /* Return this request and all that depend upon it for signaling */
2962 execlists_unhold(engine, cap->rq);
2963 i915_request_put(cap->rq);
2964
2965 kfree(cap);
2966}
2967
2968static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2969{
2970 const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
2971 struct execlists_capture *cap;
2972
2973 cap = kmalloc(sizeof(*cap), gfp);
2974 if (!cap)
2975 return NULL;
2976
2977 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2978 if (!cap->error)
2979 goto err_cap;
2980
2981 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2982 if (!cap->error->gt)
2983 goto err_gpu;
2984
2985 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2986 if (!cap->error->gt->engine)
2987 goto err_gt;
2988
2989 return cap;
2990
2991err_gt:
2992 kfree(cap->error->gt);
2993err_gpu:
2994 kfree(cap->error);
2995err_cap:
2996 kfree(cap);
2997 return NULL;
2998}
2999
3000static struct i915_request *
3001active_context(struct intel_engine_cs *engine, u32 ccid)
3002{
3003 const struct intel_engine_execlists * const el = &engine->execlists;
3004 struct i915_request * const *port, *rq;
3005
3006 /*
3007 * Use the most recent result from process_csb(), but just in case
3008 * we trigger an error (via interrupt) before the first CS event has
3009 * been written, peek at the next submission.
3010 */
3011
3012 for (port = el->active; (rq = *port); port++) {
3013 if (rq->context->lrc.ccid == ccid) {
3014 ENGINE_TRACE(engine,
3015 "ccid found at active:%zd\n",
3016 port - el->active);
3017 return rq;
3018 }
3019 }
3020
3021 for (port = el->pending; (rq = *port); port++) {
3022 if (rq->context->lrc.ccid == ccid) {
3023 ENGINE_TRACE(engine,
3024 "ccid found at pending:%zd\n",
3025 port - el->pending);
3026 return rq;
3027 }
3028 }
3029
3030 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid);
3031 return NULL;
3032}
3033
3034static u32 active_ccid(struct intel_engine_cs *engine)
3035{
3036 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI);
3037}
3038
3039static void execlists_capture(struct intel_engine_cs *engine)
3040{
3041 struct execlists_capture *cap;
3042
3043 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
3044 return;
3045
3046 /*
3047 * We need to _quickly_ capture the engine state before we reset.
3048 * We are inside an atomic section (softirq) here and we are delaying
3049 * the forced preemption event.
3050 */
3051 cap = capture_regs(engine);
3052 if (!cap)
3053 return;
3054
3055 spin_lock_irq(&engine->active.lock);
3056 cap->rq = active_context(engine, active_ccid(engine));
3057 if (cap->rq) {
3058 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3059 cap->rq = i915_request_get_rcu(cap->rq);
3060 }
3061 spin_unlock_irq(&engine->active.lock);
3062 if (!cap->rq)
3063 goto err_free;
3064
3065 /*
3066 * Remove the request from the execlists queue, and take ownership
3067 * of the request. We pass it to our worker who will _slowly_ compress
3068 * all the pages the _user_ requested for debugging their batch, after
3069 * which we return it to the queue for signaling.
3070 *
3071 * By removing them from the execlists queue, we also remove the
3072 * requests from being processed by __unwind_incomplete_requests()
3073 * during the intel_engine_reset(), and so they will *not* be replayed
3074 * afterwards.
3075 *
3076 * Note that because we have not yet reset the engine at this point,
3077 * it is possible for the request that we have identified as being
3078 * guilty, did in fact complete and we will then hit an arbitration
3079 * point allowing the outstanding preemption to succeed. The likelihood
3080 * of that is very low (as capturing of the engine registers should be
3081 * fast enough to run inside an irq-off atomic section!), so we will
3082 * simply hold that request accountable for being non-preemptible
3083 * long enough to force the reset.
3084 */
3085 if (!execlists_hold(engine, cap->rq))
3086 goto err_rq;
3087
3088 INIT_WORK(&cap->work, execlists_capture_work);
3089 schedule_work(&cap->work);
3090 return;
3091
3092err_rq:
3093 i915_request_put(cap->rq);
3094err_free:
3095 i915_gpu_coredump_put(cap->error);
3096 kfree(cap);
3097}
3098
3099static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3100{
3101 const unsigned int bit = I915_RESET_ENGINE + engine->id;
3102 unsigned long *lock = &engine->gt->reset.flags;
3103
3104 if (!intel_has_reset_engine(engine->gt))
3105 return;
3106
3107 if (test_and_set_bit(bit, lock))
3108 return;
3109
3110 ENGINE_TRACE(engine, "reset for %s\n", msg);
3111
3112 /* Mark this tasklet as disabled to avoid waiting for it to complete */
3113 tasklet_disable_nosync(&engine->execlists.tasklet);
3114
3115 ring_set_paused(engine, 1); /* Freeze the current request in place */
3116 execlists_capture(engine);
3117 intel_engine_reset(engine, msg);
3118
3119 tasklet_enable(&engine->execlists.tasklet);
3120 clear_and_wake_up_bit(bit, lock);
3121}
3122
3123static bool preempt_timeout(const struct intel_engine_cs *const engine)
3124{
3125 const struct timer_list *t = &engine->execlists.preempt;
3126
3127 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
3128 return false;
3129
3130 if (!timer_expired(t))
3131 return false;
3132
3133 return READ_ONCE(engine->execlists.pending[0]);
3134}
3135
3136/*
3137 * Check the unread Context Status Buffers and manage the submission of new
3138 * contexts to the ELSP accordingly.
3139 */
3140static void execlists_submission_tasklet(unsigned long data)
3141{
3142 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3143 bool timeout = preempt_timeout(engine);
3144
3145 process_csb(engine);
3146
3147 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) {
3148 const char *msg;
3149
3150 /* Generate the error message in priority wrt to the user! */
3151 if (engine->execlists.error_interrupt & GENMASK(15, 0))
3152 msg = "CS error"; /* thrown by a user payload */
3153 else if (engine->execlists.error_interrupt & ERROR_CSB)
3154 msg = "invalid CSB event";
3155 else
3156 msg = "internal error";
3157
3158 engine->execlists.error_interrupt = 0;
3159 execlists_reset(engine, msg);
3160 }
3161
3162 if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
3163 unsigned long flags;
3164
3165 spin_lock_irqsave(&engine->active.lock, flags);
3166 __execlists_submission_tasklet(engine);
3167 spin_unlock_irqrestore(&engine->active.lock, flags);
3168
3169 /* Recheck after serialising with direct-submission */
3170 if (unlikely(timeout && preempt_timeout(engine)))
3171 execlists_reset(engine, "preemption time out");
3172 }
3173}
3174
3175static void __execlists_kick(struct intel_engine_execlists *execlists)
3176{
3177 /* Kick the tasklet for some interrupt coalescing and reset handling */
3178 tasklet_hi_schedule(&execlists->tasklet);
3179}
3180
3181#define execlists_kick(t, member) \
3182 __execlists_kick(container_of(t, struct intel_engine_execlists, member))
3183
3184static void execlists_timeslice(struct timer_list *timer)
3185{
3186 execlists_kick(timer, timer);
3187}
3188
3189static void execlists_preempt(struct timer_list *timer)
3190{
3191 execlists_kick(timer, preempt);
3192}
3193
3194static void queue_request(struct intel_engine_cs *engine,
3195 struct i915_request *rq)
3196{
3197 GEM_BUG_ON(!list_empty(&rq->sched.link));
3198 list_add_tail(&rq->sched.link,
3199 i915_sched_lookup_priolist(engine, rq_prio(rq)));
3200 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3201}
3202
3203static void __submit_queue_imm(struct intel_engine_cs *engine)
3204{
3205 struct intel_engine_execlists * const execlists = &engine->execlists;
3206
3207 if (reset_in_progress(execlists))
3208 return; /* defer until we restart the engine following reset */
3209
3210 __execlists_submission_tasklet(engine);
3211}
3212
3213static void submit_queue(struct intel_engine_cs *engine,
3214 const struct i915_request *rq)
3215{
3216 struct intel_engine_execlists *execlists = &engine->execlists;
3217
3218 if (rq_prio(rq) <= execlists->queue_priority_hint)
3219 return;
3220
3221 execlists->queue_priority_hint = rq_prio(rq);
3222 __submit_queue_imm(engine);
3223}
3224
3225static bool ancestor_on_hold(const struct intel_engine_cs *engine,
3226 const struct i915_request *rq)
3227{
3228 GEM_BUG_ON(i915_request_on_hold(rq));
3229 return !list_empty(&engine->active.hold) && hold_request(rq);
3230}
3231
3232static void flush_csb(struct intel_engine_cs *engine)
3233{
3234 struct intel_engine_execlists *el = &engine->execlists;
3235
3236 if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) {
3237 if (!reset_in_progress(el))
3238 process_csb(engine);
3239 tasklet_unlock(&el->tasklet);
3240 }
3241}
3242
3243static void execlists_submit_request(struct i915_request *request)
3244{
3245 struct intel_engine_cs *engine = request->engine;
3246 unsigned long flags;
3247
3248 /* Hopefully we clear execlists->pending[] to let us through */
3249 flush_csb(engine);
3250
3251 /* Will be called from irq-context when using foreign fences. */
3252 spin_lock_irqsave(&engine->active.lock, flags);
3253
3254 if (unlikely(ancestor_on_hold(engine, request))) {
3255 RQ_TRACE(request, "ancestor on hold\n");
3256 list_add_tail(&request->sched.link, &engine->active.hold);
3257 i915_request_set_hold(request);
3258 } else {
3259 queue_request(engine, request);
3260
3261 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
3262 GEM_BUG_ON(list_empty(&request->sched.link));
3263
3264 submit_queue(engine, request);
3265 }
3266
3267 spin_unlock_irqrestore(&engine->active.lock, flags);
3268}
3269
3270static void __execlists_context_fini(struct intel_context *ce)
3271{
3272 intel_ring_put(ce->ring);
3273 i915_vma_put(ce->state);
3274}
3275
3276static void execlists_context_destroy(struct kref *kref)
3277{
3278 struct intel_context *ce = container_of(kref, typeof(*ce), ref);
3279
3280 GEM_BUG_ON(!i915_active_is_idle(&ce->active));
3281 GEM_BUG_ON(intel_context_is_pinned(ce));
3282
3283 if (ce->state)
3284 __execlists_context_fini(ce);
3285
3286 intel_context_fini(ce);
3287 intel_context_free(ce);
3288}
3289
3290static void
3291set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3292{
3293 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3294 return;
3295
3296 vaddr += engine->context_size;
3297
3298 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
3299}
3300
3301static void
3302check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3303{
3304 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
3305 return;
3306
3307 vaddr += engine->context_size;
3308
3309 if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
3310 drm_err_once(&engine->i915->drm,
3311 "%s context redzone overwritten!\n",
3312 engine->name);
3313}
3314
3315static void execlists_context_unpin(struct intel_context *ce)
3316{
3317 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
3318 ce->engine);
3319
3320 i915_gem_object_unpin_map(ce->state->obj);
3321}
3322
3323static u32 *
3324gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3325{
3326 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3327 MI_SRM_LRM_GLOBAL_GTT |
3328 MI_LRI_LRM_CS_MMIO;
3329 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3330 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3331 CTX_TIMESTAMP * sizeof(u32);
3332 *cs++ = 0;
3333
3334 *cs++ = MI_LOAD_REGISTER_REG |
3335 MI_LRR_SOURCE_CS_MMIO |
3336 MI_LRI_LRM_CS_MMIO;
3337 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3338 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3339
3340 *cs++ = MI_LOAD_REGISTER_REG |
3341 MI_LRR_SOURCE_CS_MMIO |
3342 MI_LRI_LRM_CS_MMIO;
3343 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3344 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3345
3346 return cs;
3347}
3348
3349static u32 *
3350gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3351{
3352 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3353
3354 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3355 MI_SRM_LRM_GLOBAL_GTT |
3356 MI_LRI_LRM_CS_MMIO;
3357 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3358 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3359 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3360 *cs++ = 0;
3361
3362 return cs;
3363}
3364
3365static u32 *
3366gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3367{
3368 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
3369
3370 *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3371 MI_SRM_LRM_GLOBAL_GTT |
3372 MI_LRI_LRM_CS_MMIO;
3373 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3374 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3375 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3376 *cs++ = 0;
3377
3378 *cs++ = MI_LOAD_REGISTER_REG |
3379 MI_LRR_SOURCE_CS_MMIO |
3380 MI_LRI_LRM_CS_MMIO;
3381 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3382 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
3383
3384 return cs;
3385}
3386
3387static u32 *
3388gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3389{
3390 cs = gen12_emit_timestamp_wa(ce, cs);
3391 cs = gen12_emit_cmd_buf_wa(ce, cs);
3392 cs = gen12_emit_restore_scratch(ce, cs);
3393
3394 return cs;
3395}
3396
3397static u32 *
3398gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3399{
3400 cs = gen12_emit_timestamp_wa(ce, cs);
3401 cs = gen12_emit_restore_scratch(ce, cs);
3402
3403 return cs;
3404}
3405
3406static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3407{
3408 return PAGE_SIZE * ce->wa_bb_page;
3409}
3410
3411static u32 *context_indirect_bb(const struct intel_context *ce)
3412{
3413 void *ptr;
3414
3415 GEM_BUG_ON(!ce->wa_bb_page);
3416
3417 ptr = ce->lrc_reg_state;
3418 ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3419 ptr += context_wa_bb_offset(ce);
3420
3421 return ptr;
3422}
3423
3424static void
3425setup_indirect_ctx_bb(const struct intel_context *ce,
3426 const struct intel_engine_cs *engine,
3427 u32 *(*emit)(const struct intel_context *, u32 *))
3428{
3429 u32 * const start = context_indirect_bb(ce);
3430 u32 *cs;
3431
3432 cs = emit(ce, start);
3433 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3434 while ((unsigned long)cs % CACHELINE_BYTES)
3435 *cs++ = MI_NOOP;
3436
3437 lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3438 i915_ggtt_offset(ce->state) +
3439 context_wa_bb_offset(ce),
3440 (cs - start) * sizeof(*cs));
3441}
3442
3443static void
3444__execlists_update_reg_state(const struct intel_context *ce,
3445 const struct intel_engine_cs *engine,
3446 u32 head)
3447{
3448 struct intel_ring *ring = ce->ring;
3449 u32 *regs = ce->lrc_reg_state;
3450
3451 GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
3452 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
3453
3454 regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
3455 regs[CTX_RING_HEAD] = head;
3456 regs[CTX_RING_TAIL] = ring->tail;
3457 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
3458
3459 /* RPCS */
3460 if (engine->class == RENDER_CLASS) {
3461 regs[CTX_R_PWR_CLK_STATE] =
3462 intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3463
3464 i915_oa_init_reg_state(ce, engine);
3465 }
3466
3467 if (ce->wa_bb_page) {
3468 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3469
3470 fn = gen12_emit_indirect_ctx_xcs;
3471 if (ce->engine->class == RENDER_CLASS)
3472 fn = gen12_emit_indirect_ctx_rcs;
3473
3474 /* Mutually exclusive wrt to global indirect bb */
3475 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3476 setup_indirect_ctx_bb(ce, engine, fn);
3477 }
3478}
3479
3480static int
3481__execlists_context_pin(struct intel_context *ce,
3482 struct intel_engine_cs *engine)
3483{
3484 void *vaddr;
3485
3486 GEM_BUG_ON(!ce->state);
3487 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
3488
3489 vaddr = i915_gem_object_pin_map(ce->state->obj,
3490 i915_coherent_map_type(engine->i915) |
3491 I915_MAP_OVERRIDE);
3492 if (IS_ERR(vaddr))
3493 return PTR_ERR(vaddr);
3494
3495 ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
3496 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
3497 __execlists_update_reg_state(ce, engine, ce->ring->tail);
3498
3499 return 0;
3500}
3501
3502static int execlists_context_pin(struct intel_context *ce)
3503{
3504 return __execlists_context_pin(ce, ce->engine);
3505}
3506
3507static int execlists_context_alloc(struct intel_context *ce)
3508{
3509 return __execlists_context_alloc(ce, ce->engine);
3510}
3511
3512static void execlists_context_reset(struct intel_context *ce)
3513{
3514 CE_TRACE(ce, "reset\n");
3515 GEM_BUG_ON(!intel_context_is_pinned(ce));
3516
3517 intel_ring_reset(ce->ring, ce->ring->emit);
3518
3519 /* Scrub away the garbage */
3520 execlists_init_reg_state(ce->lrc_reg_state,
3521 ce, ce->engine, ce->ring, true);
3522 __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3523
3524 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;
3525}
3526
3527static const struct intel_context_ops execlists_context_ops = {
3528 .alloc = execlists_context_alloc,
3529
3530 .pin = execlists_context_pin,
3531 .unpin = execlists_context_unpin,
3532
3533 .enter = intel_context_enter_engine,
3534 .exit = intel_context_exit_engine,
3535
3536 .reset = execlists_context_reset,
3537 .destroy = execlists_context_destroy,
3538};
3539
3540static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3541{
3542 u32 *cs;
3543
3544 GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
3545 if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3546 return 0;
3547
3548 cs = intel_ring_begin(rq, 6);
3549 if (IS_ERR(cs))
3550 return PTR_ERR(cs);
3551
3552 /*
3553 * Check if we have been preempted before we even get started.
3554 *
3555 * After this point i915_request_started() reports true, even if
3556 * we get preempted and so are no longer running.
3557 */
3558 *cs++ = MI_ARB_CHECK;
3559 *cs++ = MI_NOOP;
3560
3561 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3562 *cs++ = i915_request_timeline(rq)->hwsp_offset;
3563 *cs++ = 0;
3564 *cs++ = rq->fence.seqno - 1;
3565
3566 intel_ring_advance(rq, cs);
3567
3568 /* Record the updated position of the request's payload */
3569 rq->infix = intel_ring_offset(rq, cs);
3570
3571 __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3572
3573 return 0;
3574}
3575
3576static int emit_pdps(struct i915_request *rq)
3577{
3578 const struct intel_engine_cs * const engine = rq->engine;
3579 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3580 int err, i;
3581 u32 *cs;
3582
3583 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915));
3584
3585 /*
3586 * Beware ye of the dragons, this sequence is magic!
3587 *
3588 * Small changes to this sequence can cause anything from
3589 * GPU hangs to forcewake errors and machine lockups!
3590 */
3591
3592 /* Flush any residual operations from the context load */
3593 err = engine->emit_flush(rq, EMIT_FLUSH);
3594 if (err)
3595 return err;
3596
3597 /* Magic required to prevent forcewake errors! */
3598 err = engine->emit_flush(rq, EMIT_INVALIDATE);
3599 if (err)
3600 return err;
3601
3602 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
3603 if (IS_ERR(cs))
3604 return PTR_ERR(cs);
3605
3606 /* Ensure the LRI have landed before we invalidate & continue */
3607 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
3608 for (i = GEN8_3LVL_PDPES; i--; ) {
3609 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3610 u32 base = engine->mmio_base;
3611
3612 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
3613 *cs++ = upper_32_bits(pd_daddr);
3614 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
3615 *cs++ = lower_32_bits(pd_daddr);
3616 }
3617 *cs++ = MI_NOOP;
3618
3619 intel_ring_advance(rq, cs);
3620
3621 return 0;
3622}
3623
3624static int execlists_request_alloc(struct i915_request *request)
3625{
3626 int ret;
3627
3628 GEM_BUG_ON(!intel_context_is_pinned(request->context));
3629
3630 /*
3631 * Flush enough space to reduce the likelihood of waiting after
3632 * we start building the request - in which case we will just
3633 * have to repeat work.
3634 */
3635 request->reserved_space += EXECLISTS_REQUEST_SIZE;
3636
3637 /*
3638 * Note that after this point, we have committed to using
3639 * this request as it is being used to both track the
3640 * state of engine initialisation and liveness of the
3641 * golden renderstate above. Think twice before you try
3642 * to cancel/unwind this request now.
3643 */
3644
3645 if (!i915_vm_is_4lvl(request->context->vm)) {
3646 ret = emit_pdps(request);
3647 if (ret)
3648 return ret;
3649 }
3650
3651 /* Unconditionally invalidate GPU caches and TLBs. */
3652 ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
3653 if (ret)
3654 return ret;
3655
3656 request->reserved_space -= EXECLISTS_REQUEST_SIZE;
3657 return 0;
3658}
3659
3660/*
3661 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3662 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3663 * but there is a slight complication as this is applied in WA batch where the
3664 * values are only initialized once so we cannot take register value at the
3665 * beginning and reuse it further; hence we save its value to memory, upload a
3666 * constant value with bit21 set and then we restore it back with the saved value.
3667 * To simplify the WA, a constant value is formed by using the default value
3668 * of this register. This shouldn't be a problem because we are only modifying
3669 * it for a short period and this batch in non-premptible. We can ofcourse
3670 * use additional instructions that read the actual value of the register
3671 * at that time and set our bit of interest but it makes the WA complicated.
3672 *
3673 * This WA is also required for Gen9 so extracting as a function avoids
3674 * code duplication.
3675 */
3676static u32 *
3677gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3678{
3679 /* NB no one else is allowed to scribble over scratch + 256! */
3680 *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3681 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3682 *batch++ = intel_gt_scratch_offset(engine->gt,
3683 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3684 *batch++ = 0;
3685
3686 *batch++ = MI_LOAD_REGISTER_IMM(1);
3687 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3688 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
3689
3690 batch = gen8_emit_pipe_control(batch,
3691 PIPE_CONTROL_CS_STALL |
3692 PIPE_CONTROL_DC_FLUSH_ENABLE,
3693 0);
3694
3695 *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
3696 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
3697 *batch++ = intel_gt_scratch_offset(engine->gt,
3698 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3699 *batch++ = 0;
3700
3701 return batch;
3702}
3703
3704/*
3705 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3706 * initialized at the beginning and shared across all contexts but this field
3707 * helps us to have multiple batches at different offsets and select them based
3708 * on a criteria. At the moment this batch always start at the beginning of the page
3709 * and at this point we don't have multiple wa_ctx batch buffers.
3710 *
3711 * The number of WA applied are not known at the beginning; we use this field
3712 * to return the no of DWORDS written.
3713 *
3714 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3715 * so it adds NOOPs as padding to make it cacheline aligned.
3716 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3717 * makes a complete batch buffer.
3718 */
3719static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3720{
3721 /* WaDisableCtxRestoreArbitration:bdw,chv */
3722 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3723
3724 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3725 if (IS_BROADWELL(engine->i915))
3726 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3727
3728 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3729 /* Actual scratch location is at 128 bytes offset */
3730 batch = gen8_emit_pipe_control(batch,
3731 PIPE_CONTROL_FLUSH_L3 |
3732 PIPE_CONTROL_STORE_DATA_INDEX |
3733 PIPE_CONTROL_CS_STALL |
3734 PIPE_CONTROL_QW_WRITE,
3735 LRC_PPHWSP_SCRATCH_ADDR);
3736
3737 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3738
3739 /* Pad to end of cacheline */
3740 while ((unsigned long)batch % CACHELINE_BYTES)
3741 *batch++ = MI_NOOP;
3742
3743 /*
3744 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3745 * execution depends on the length specified in terms of cache lines
3746 * in the register CTX_RCS_INDIRECT_CTX
3747 */
3748
3749 return batch;
3750}
3751
3752struct lri {
3753 i915_reg_t reg;
3754 u32 value;
3755};
3756
3757static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3758{
3759 GEM_BUG_ON(!count || count > 63);
3760
3761 *batch++ = MI_LOAD_REGISTER_IMM(count);
3762 do {
3763 *batch++ = i915_mmio_reg_offset(lri->reg);
3764 *batch++ = lri->value;
3765 } while (lri++, --count);
3766 *batch++ = MI_NOOP;
3767
3768 return batch;
3769}
3770
3771static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3772{
3773 static const struct lri lri[] = {
3774 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3775 {
3776 COMMON_SLICE_CHICKEN2,
3777 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
3778 0),
3779 },
3780
3781 /* BSpec: 11391 */
3782 {
3783 FF_SLICE_CHICKEN,
3784 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
3785 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
3786 },
3787
3788 /* BSpec: 11299 */
3789 {
3790 _3D_CHICKEN3,
3791 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
3792 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
3793 }
3794 };
3795
3796 *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
3797
3798 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3799 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3800
3801 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3802 batch = gen8_emit_pipe_control(batch,
3803 PIPE_CONTROL_FLUSH_L3 |
3804 PIPE_CONTROL_STORE_DATA_INDEX |
3805 PIPE_CONTROL_CS_STALL |
3806 PIPE_CONTROL_QW_WRITE,
3807 LRC_PPHWSP_SCRATCH_ADDR);
3808
3809 batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
3810
3811 /* WaMediaPoolStateCmdInWABB:bxt,glk */
3812 if (HAS_POOLED_EU(engine->i915)) {
3813 /*
3814 * EU pool configuration is setup along with golden context
3815 * during context initialization. This value depends on
3816 * device type (2x6 or 3x6) and needs to be updated based
3817 * on which subslice is disabled especially for 2x6
3818 * devices, however it is safe to load default
3819 * configuration of 3x6 device instead of masking off
3820 * corresponding bits because HW ignores bits of a disabled
3821 * subslice and drops down to appropriate config. Please
3822 * see render_state_setup() in i915_gem_render_state.c for
3823 * possible configurations, to avoid duplication they are
3824 * not shown here again.
3825 */
3826 *batch++ = GEN9_MEDIA_POOL_STATE;
3827 *batch++ = GEN9_MEDIA_POOL_ENABLE;
3828 *batch++ = 0x00777000;
3829 *batch++ = 0;
3830 *batch++ = 0;
3831 *batch++ = 0;
3832 }
3833
3834 *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
3835
3836 /* Pad to end of cacheline */
3837 while ((unsigned long)batch % CACHELINE_BYTES)
3838 *batch++ = MI_NOOP;
3839
3840 return batch;
3841}
3842
3843static u32 *
3844gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3845{
3846 int i;
3847
3848 /*
3849 * WaPipeControlBefore3DStateSamplePattern: cnl
3850 *
3851 * Ensure the engine is idle prior to programming a
3852 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3853 */
3854 batch = gen8_emit_pipe_control(batch,
3855 PIPE_CONTROL_CS_STALL,
3856 0);
3857 /*
3858 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3859 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3860 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3861 * confusing. Since gen8_emit_pipe_control() already advances the
3862 * batch by 6 dwords, we advance the other 10 here, completing a
3863 * cacheline. It's not clear if the workaround requires this padding
3864 * before other commands, or if it's just the regular padding we would
3865 * already have for the workaround bb, so leave it here for now.
3866 */
3867 for (i = 0; i < 10; i++)
3868 *batch++ = MI_NOOP;
3869
3870 /* Pad to end of cacheline */
3871 while ((unsigned long)batch % CACHELINE_BYTES)
3872 *batch++ = MI_NOOP;
3873
3874 return batch;
3875}
3876
3877#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
3878
3879static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3880{
3881 struct drm_i915_gem_object *obj;
3882 struct i915_vma *vma;
3883 int err;
3884
3885 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
3886 if (IS_ERR(obj))
3887 return PTR_ERR(obj);
3888
3889 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
3890 if (IS_ERR(vma)) {
3891 err = PTR_ERR(vma);
3892 goto err;
3893 }
3894
3895 err = i915_ggtt_pin(vma, 0, PIN_HIGH);
3896 if (err)
3897 goto err;
3898
3899 engine->wa_ctx.vma = vma;
3900 return 0;
3901
3902err:
3903 i915_gem_object_put(obj);
3904 return err;
3905}
3906
3907static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3908{
3909 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3910}
3911
3912typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3913
3914static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3915{
3916 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3917 struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3918 &wa_ctx->per_ctx };
3919 wa_bb_func_t wa_bb_fn[2];
3920 void *batch, *batch_ptr;
3921 unsigned int i;
3922 int ret;
3923
3924 if (engine->class != RENDER_CLASS)
3925 return 0;
3926
3927 switch (INTEL_GEN(engine->i915)) {
3928 case 12:
3929 case 11:
3930 return 0;
3931 case 10:
3932 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3933 wa_bb_fn[1] = NULL;
3934 break;
3935 case 9:
3936 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3937 wa_bb_fn[1] = NULL;
3938 break;
3939 case 8:
3940 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3941 wa_bb_fn[1] = NULL;
3942 break;
3943 default:
3944 MISSING_CASE(INTEL_GEN(engine->i915));
3945 return 0;
3946 }
3947
3948 ret = lrc_setup_wa_ctx(engine);
3949 if (ret) {
3950 drm_dbg(&engine->i915->drm,
3951 "Failed to setup context WA page: %d\n", ret);
3952 return ret;
3953 }
3954
3955 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
3956
3957 /*
3958 * Emit the two workaround batch buffers, recording the offset from the
3959 * start of the workaround batch buffer object for each and their
3960 * respective sizes.
3961 */
3962 batch_ptr = batch;
3963 for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
3964 wa_bb[i]->offset = batch_ptr - batch;
3965 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
3966 CACHELINE_BYTES))) {
3967 ret = -EINVAL;
3968 break;
3969 }
3970 if (wa_bb_fn[i])
3971 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
3972 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
3973 }
3974 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
3975
3976 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
3977 __i915_gem_object_release_map(wa_ctx->vma->obj);
3978 if (ret)
3979 lrc_destroy_wa_ctx(engine);
3980
3981 return ret;
3982}
3983
3984static void reset_csb_pointers(struct intel_engine_cs *engine)
3985{
3986 struct intel_engine_execlists * const execlists = &engine->execlists;
3987 const unsigned int reset_value = execlists->csb_size - 1;
3988
3989 ring_set_paused(engine, 0);
3990
3991 /*
3992 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
3993 * Bludgeon them with a mmio update to be sure.
3994 */
3995 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
3996 0xffff << 16 | reset_value << 8 | reset_value);
3997 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
3998
3999 /*
4000 * After a reset, the HW starts writing into CSB entry [0]. We
4001 * therefore have to set our HEAD pointer back one entry so that
4002 * the *first* entry we check is entry 0. To complicate this further,
4003 * as we don't wait for the first interrupt after reset, we have to
4004 * fake the HW write to point back to the last entry so that our
4005 * inline comparison of our cached head position against the last HW
4006 * write works even before the first interrupt.
4007 */
4008 execlists->csb_head = reset_value;
4009 WRITE_ONCE(*execlists->csb_write, reset_value);
4010 wmb(); /* Make sure this is visible to HW (paranoia?) */
4011
4012 invalidate_csb_entries(&execlists->csb_status[0],
4013 &execlists->csb_status[reset_value]);
4014
4015 /* Once more for luck and our trusty paranoia */
4016 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
4017 0xffff << 16 | reset_value << 8 | reset_value);
4018 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
4019
4020 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value);
4021}
4022
4023static void execlists_sanitize(struct intel_engine_cs *engine)
4024{
4025 /*
4026 * Poison residual state on resume, in case the suspend didn't!
4027 *
4028 * We have to assume that across suspend/resume (or other loss
4029 * of control) that the contents of our pinned buffers has been
4030 * lost, replaced by garbage. Since this doesn't always happen,
4031 * let's poison such state so that we more quickly spot when
4032 * we falsely assume it has been preserved.
4033 */
4034 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4035 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
4036
4037 reset_csb_pointers(engine);
4038
4039 /*
4040 * The kernel_context HWSP is stored in the status_page. As above,
4041 * that may be lost on resume/initialisation, and so we need to
4042 * reset the value in the HWSP.
4043 */
4044 intel_timeline_reset_seqno(engine->kernel_context->timeline);
4045
4046 /* And scrub the dirty cachelines for the HWSP */
4047 clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
4048}
4049
4050static void enable_error_interrupt(struct intel_engine_cs *engine)
4051{
4052 u32 status;
4053
4054 engine->execlists.error_interrupt = 0;
4055 ENGINE_WRITE(engine, RING_EMR, ~0u);
4056 ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
4057
4058 status = ENGINE_READ(engine, RING_ESR);
4059 if (unlikely(status)) {
4060 drm_err(&engine->i915->drm,
4061 "engine '%s' resumed still in error: %08x\n",
4062 engine->name, status);
4063 __intel_gt_reset(engine->gt, engine->mask);
4064 }
4065
4066 /*
4067 * On current gen8+, we have 2 signals to play with
4068 *
4069 * - I915_ERROR_INSTUCTION (bit 0)
4070 *
4071 * Generate an error if the command parser encounters an invalid
4072 * instruction
4073 *
4074 * This is a fatal error.
4075 *
4076 * - CP_PRIV (bit 2)
4077 *
4078 * Generate an error on privilege violation (where the CP replaces
4079 * the instruction with a no-op). This also fires for writes into
4080 * read-only scratch pages.
4081 *
4082 * This is a non-fatal error, parsing continues.
4083 *
4084 * * there are a few others defined for odd HW that we do not use
4085 *
4086 * Since CP_PRIV fires for cases where we have chosen to ignore the
4087 * error (as the HW is validating and suppressing the mistakes), we
4088 * only unmask the instruction error bit.
4089 */
4090 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION);
4091}
4092
4093static void enable_execlists(struct intel_engine_cs *engine)
4094{
4095 u32 mode;
4096
4097 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4098
4099 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4100
4101 if (INTEL_GEN(engine->i915) >= 11)
4102 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
4103 else
4104 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
4105 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
4106
4107 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
4108
4109 ENGINE_WRITE_FW(engine,
4110 RING_HWS_PGA,
4111 i915_ggtt_offset(engine->status_page.vma));
4112 ENGINE_POSTING_READ(engine, RING_HWS_PGA);
4113
4114 enable_error_interrupt(engine);
4115
4116 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);
4117}
4118
4119static bool unexpected_starting_state(struct intel_engine_cs *engine)
4120{
4121 bool unexpected = false;
4122
4123 if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
4124 drm_dbg(&engine->i915->drm,
4125 "STOP_RING still set in RING_MI_MODE\n");
4126 unexpected = true;
4127 }
4128
4129 return unexpected;
4130}
4131
4132static int execlists_resume(struct intel_engine_cs *engine)
4133{
4134 intel_mocs_init_engine(engine);
4135
4136 intel_engine_reset_breadcrumbs(engine);
4137
4138 if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
4139 struct drm_printer p = drm_debug_printer(__func__);
4140
4141 intel_engine_dump(engine, &p, NULL);
4142 }
4143
4144 enable_execlists(engine);
4145
4146 return 0;
4147}
4148
4149static void execlists_reset_prepare(struct intel_engine_cs *engine)
4150{
4151 struct intel_engine_execlists * const execlists = &engine->execlists;
4152 unsigned long flags;
4153
4154 ENGINE_TRACE(engine, "depth<-%d\n",
4155 atomic_read(&execlists->tasklet.count));
4156
4157 /*
4158 * Prevent request submission to the hardware until we have
4159 * completed the reset in i915_gem_reset_finish(). If a request
4160 * is completed by one engine, it may then queue a request
4161 * to a second via its execlists->tasklet *just* as we are
4162 * calling engine->resume() and also writing the ELSP.
4163 * Turning off the execlists->tasklet until the reset is over
4164 * prevents the race.
4165 */
4166 __tasklet_disable_sync_once(&execlists->tasklet);
4167 GEM_BUG_ON(!reset_in_progress(execlists));
4168
4169 /* And flush any current direct submission. */
4170 spin_lock_irqsave(&engine->active.lock, flags);
4171 spin_unlock_irqrestore(&engine->active.lock, flags);
4172
4173 /*
4174 * We stop engines, otherwise we might get failed reset and a
4175 * dead gpu (on elk). Also as modern gpu as kbl can suffer
4176 * from system hang if batchbuffer is progressing when
4177 * the reset is issued, regardless of READY_TO_RESET ack.
4178 * Thus assume it is best to stop engines on all gens
4179 * where we have a gpu reset.
4180 *
4181 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4182 *
4183 * FIXME: Wa for more modern gens needs to be validated
4184 */
4185 ring_set_paused(engine, 1);
4186 intel_engine_stop_cs(engine);
4187
4188 engine->execlists.reset_ccid = active_ccid(engine);
4189}
4190
4191static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4192{
4193 int x;
4194
4195 x = lrc_ring_mi_mode(engine);
4196 if (x != -1) {
4197 regs[x + 1] &= ~STOP_RING;
4198 regs[x + 1] |= STOP_RING << 16;
4199 }
4200}
4201
4202static void __execlists_reset_reg_state(const struct intel_context *ce,
4203 const struct intel_engine_cs *engine)
4204{
4205 u32 *regs = ce->lrc_reg_state;
4206
4207 __reset_stop_ring(regs, engine);
4208}
4209
4210static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
4211{
4212 struct intel_engine_execlists * const execlists = &engine->execlists;
4213 struct intel_context *ce;
4214 struct i915_request *rq;
4215 u32 head;
4216
4217 mb(); /* paranoia: read the CSB pointers from after the reset */
4218 clflush(execlists->csb_write);
4219 mb();
4220
4221 process_csb(engine); /* drain preemption events */
4222
4223 /* Following the reset, we need to reload the CSB read/write pointers */
4224 reset_csb_pointers(engine);
4225
4226 /*
4227 * Save the currently executing context, even if we completed
4228 * its request, it was still running at the time of the
4229 * reset and will have been clobbered.
4230 */
4231 rq = active_context(engine, engine->execlists.reset_ccid);
4232 if (!rq)
4233 goto unwind;
4234
4235 ce = rq->context;
4236 GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
4237
4238 if (i915_request_completed(rq)) {
4239 /* Idle context; tidy up the ring so we can restart afresh */
4240 head = intel_ring_wrap(ce->ring, rq->tail);
4241 goto out_replay;
4242 }
4243
4244 /* We still have requests in-flight; the engine should be active */
4245 GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
4246
4247 /* Context has requests still in-flight; it should not be idle! */
4248 GEM_BUG_ON(i915_active_is_idle(&ce->active));
4249
4250 rq = active_request(ce->timeline, rq);
4251 head = intel_ring_wrap(ce->ring, rq->head);
4252 GEM_BUG_ON(head == ce->ring->tail);
4253
4254 /*
4255 * If this request hasn't started yet, e.g. it is waiting on a
4256 * semaphore, we need to avoid skipping the request or else we
4257 * break the signaling chain. However, if the context is corrupt
4258 * the request will not restart and we will be stuck with a wedged
4259 * device. It is quite often the case that if we issue a reset
4260 * while the GPU is loading the context image, that the context
4261 * image becomes corrupt.
4262 *
4263 * Otherwise, if we have not started yet, the request should replay
4264 * perfectly and we do not need to flag the result as being erroneous.
4265 */
4266 if (!i915_request_started(rq))
4267 goto out_replay;
4268
4269 /*
4270 * If the request was innocent, we leave the request in the ELSP
4271 * and will try to replay it on restarting. The context image may
4272 * have been corrupted by the reset, in which case we may have
4273 * to service a new GPU hang, but more likely we can continue on
4274 * without impact.
4275 *
4276 * If the request was guilty, we presume the context is corrupt
4277 * and have to at least restore the RING register in the context
4278 * image back to the expected values to skip over the guilty request.
4279 */
4280 __i915_request_reset(rq, stalled);
4281
4282 /*
4283 * We want a simple context + ring to execute the breadcrumb update.
4284 * We cannot rely on the context being intact across the GPU hang,
4285 * so clear it and rebuild just what we need for the breadcrumb.
4286 * All pending requests for this context will be zapped, and any
4287 * future request will be after userspace has had the opportunity
4288 * to recreate its own state.
4289 */
4290out_replay:
4291 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
4292 head, ce->ring->tail);
4293 __execlists_reset_reg_state(ce, engine);
4294 __execlists_update_reg_state(ce, engine, head);
4295 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
4296
4297unwind:
4298 /* Push back any incomplete requests for replay after the reset. */
4299 cancel_port_requests(execlists);
4300 __unwind_incomplete_requests(engine);
4301}
4302
4303static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
4304{
4305 unsigned long flags;
4306
4307 ENGINE_TRACE(engine, "\n");
4308
4309 spin_lock_irqsave(&engine->active.lock, flags);
4310
4311 __execlists_reset(engine, stalled);
4312
4313 spin_unlock_irqrestore(&engine->active.lock, flags);
4314}
4315
4316static void nop_submission_tasklet(unsigned long data)
4317{
4318 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4319
4320 /* The driver is wedged; don't process any more events. */
4321 WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN);
4322}
4323
4324static void execlists_reset_cancel(struct intel_engine_cs *engine)
4325{
4326 struct intel_engine_execlists * const execlists = &engine->execlists;
4327 struct i915_request *rq, *rn;
4328 struct rb_node *rb;
4329 unsigned long flags;
4330
4331 ENGINE_TRACE(engine, "\n");
4332
4333 /*
4334 * Before we call engine->cancel_requests(), we should have exclusive
4335 * access to the submission state. This is arranged for us by the
4336 * caller disabling the interrupt generation, the tasklet and other
4337 * threads that may then access the same state, giving us a free hand
4338 * to reset state. However, we still need to let lockdep be aware that
4339 * we know this state may be accessed in hardirq context, so we
4340 * disable the irq around this manipulation and we want to keep
4341 * the spinlock focused on its duties and not accidentally conflate
4342 * coverage to the submission's irq state. (Similarly, although we
4343 * shouldn't need to disable irq around the manipulation of the
4344 * submission's irq state, we also wish to remind ourselves that
4345 * it is irq state.)
4346 */
4347 spin_lock_irqsave(&engine->active.lock, flags);
4348
4349 __execlists_reset(engine, true);
4350
4351 /* Mark all executing requests as skipped. */
4352 list_for_each_entry(rq, &engine->active.requests, sched.link)
4353 mark_eio(rq);
4354
4355 /* Flush the queued requests to the timeline list (for retiring). */
4356 while ((rb = rb_first_cached(&execlists->queue))) {
4357 struct i915_priolist *p = to_priolist(rb);
4358 int i;
4359
4360 priolist_for_each_request_consume(rq, rn, p, i) {
4361 mark_eio(rq);
4362 __i915_request_submit(rq);
4363 }
4364
4365 rb_erase_cached(&p->node, &execlists->queue);
4366 i915_priolist_free(p);
4367 }
4368
4369 /* On-hold requests will be flushed to timeline upon their release */
4370 list_for_each_entry(rq, &engine->active.hold, sched.link)
4371 mark_eio(rq);
4372
4373 /* Cancel all attached virtual engines */
4374 while ((rb = rb_first_cached(&execlists->virtual))) {
4375 struct virtual_engine *ve =
4376 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
4377
4378 rb_erase_cached(rb, &execlists->virtual);
4379 RB_CLEAR_NODE(rb);
4380
4381 spin_lock(&ve->base.active.lock);
4382 rq = fetch_and_zero(&ve->request);
4383 if (rq) {
4384 mark_eio(rq);
4385
4386 rq->engine = engine;
4387 __i915_request_submit(rq);
4388 i915_request_put(rq);
4389
4390 ve->base.execlists.queue_priority_hint = INT_MIN;
4391 }
4392 spin_unlock(&ve->base.active.lock);
4393 }
4394
4395 /* Remaining _unready_ requests will be nop'ed when submitted */
4396
4397 execlists->queue_priority_hint = INT_MIN;
4398 execlists->queue = RB_ROOT_CACHED;
4399
4400 GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
4401 execlists->tasklet.func = nop_submission_tasklet;
4402
4403 spin_unlock_irqrestore(&engine->active.lock, flags);
4404}
4405
4406static void execlists_reset_finish(struct intel_engine_cs *engine)
4407{
4408 struct intel_engine_execlists * const execlists = &engine->execlists;
4409
4410 /*
4411 * After a GPU reset, we may have requests to replay. Do so now while
4412 * we still have the forcewake to be sure that the GPU is not allowed
4413 * to sleep before we restart and reload a context.
4414 */
4415 GEM_BUG_ON(!reset_in_progress(execlists));
4416 if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
4417 execlists->tasklet.func(execlists->tasklet.data);
4418
4419 if (__tasklet_enable(&execlists->tasklet))
4420 /* And kick in case we missed a new request submission. */
4421 tasklet_hi_schedule(&execlists->tasklet);
4422 ENGINE_TRACE(engine, "depth->%d\n",
4423 atomic_read(&execlists->tasklet.count));
4424}
4425
4426static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4427 u64 offset, u32 len,
4428 const unsigned int flags)
4429{
4430 u32 *cs;
4431
4432 cs = intel_ring_begin(rq, 4);
4433 if (IS_ERR(cs))
4434 return PTR_ERR(cs);
4435
4436 /*
4437 * WaDisableCtxRestoreArbitration:bdw,chv
4438 *
4439 * We don't need to perform MI_ARB_ENABLE as often as we do (in
4440 * particular all the gen that do not need the w/a at all!), if we
4441 * took care to make sure that on every switch into this context
4442 * (both ordinary and for preemption) that arbitrartion was enabled
4443 * we would be fine. However, for gen8 there is another w/a that
4444 * requires us to not preempt inside GPGPU execution, so we keep
4445 * arbitration disabled for gen8 batches. Arbitration will be
4446 * re-enabled before we close the request
4447 * (engine->emit_fini_breadcrumb).
4448 */
4449 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4450
4451 /* FIXME(BDW+): Address space and security selectors. */
4452 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4453 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4454 *cs++ = lower_32_bits(offset);
4455 *cs++ = upper_32_bits(offset);
4456
4457 intel_ring_advance(rq, cs);
4458
4459 return 0;
4460}
4461
4462static int gen8_emit_bb_start(struct i915_request *rq,
4463 u64 offset, u32 len,
4464 const unsigned int flags)
4465{
4466 u32 *cs;
4467
4468 cs = intel_ring_begin(rq, 6);
4469 if (IS_ERR(cs))
4470 return PTR_ERR(cs);
4471
4472 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4473
4474 *cs++ = MI_BATCH_BUFFER_START_GEN8 |
4475 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
4476 *cs++ = lower_32_bits(offset);
4477 *cs++ = upper_32_bits(offset);
4478
4479 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
4480 *cs++ = MI_NOOP;
4481
4482 intel_ring_advance(rq, cs);
4483
4484 return 0;
4485}
4486
4487static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4488{
4489 ENGINE_WRITE(engine, RING_IMR,
4490 ~(engine->irq_enable_mask | engine->irq_keep_mask));
4491 ENGINE_POSTING_READ(engine, RING_IMR);
4492}
4493
4494static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4495{
4496 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
4497}
4498
4499static int gen8_emit_flush(struct i915_request *request, u32 mode)
4500{
4501 u32 cmd, *cs;
4502
4503 cs = intel_ring_begin(request, 4);
4504 if (IS_ERR(cs))
4505 return PTR_ERR(cs);
4506
4507 cmd = MI_FLUSH_DW + 1;
4508
4509 /* We always require a command barrier so that subsequent
4510 * commands, such as breadcrumb interrupts, are strictly ordered
4511 * wrt the contents of the write cache being flushed to memory
4512 * (and thus being coherent from the CPU).
4513 */
4514 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4515
4516 if (mode & EMIT_INVALIDATE) {
4517 cmd |= MI_INVALIDATE_TLB;
4518 if (request->engine->class == VIDEO_DECODE_CLASS)
4519 cmd |= MI_INVALIDATE_BSD;
4520 }
4521
4522 *cs++ = cmd;
4523 *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4524 *cs++ = 0; /* upper addr */
4525 *cs++ = 0; /* value */
4526 intel_ring_advance(request, cs);
4527
4528 return 0;
4529}
4530
4531static int gen8_emit_flush_render(struct i915_request *request,
4532 u32 mode)
4533{
4534 bool vf_flush_wa = false, dc_flush_wa = false;
4535 u32 *cs, flags = 0;
4536 int len;
4537
4538 flags |= PIPE_CONTROL_CS_STALL;
4539
4540 if (mode & EMIT_FLUSH) {
4541 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4542 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4543 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4544 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4545 }
4546
4547 if (mode & EMIT_INVALIDATE) {
4548 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4549 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4550 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4551 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4552 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4553 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4554 flags |= PIPE_CONTROL_QW_WRITE;
4555 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4556
4557 /*
4558 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4559 * pipe control.
4560 */
4561 if (IS_GEN(request->engine->i915, 9))
4562 vf_flush_wa = true;
4563
4564 /* WaForGAMHang:kbl */
4565 if (IS_KBL_REVID(request->engine->i915, 0, KBL_REVID_B0))
4566 dc_flush_wa = true;
4567 }
4568
4569 len = 6;
4570
4571 if (vf_flush_wa)
4572 len += 6;
4573
4574 if (dc_flush_wa)
4575 len += 12;
4576
4577 cs = intel_ring_begin(request, len);
4578 if (IS_ERR(cs))
4579 return PTR_ERR(cs);
4580
4581 if (vf_flush_wa)
4582 cs = gen8_emit_pipe_control(cs, 0, 0);
4583
4584 if (dc_flush_wa)
4585 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
4586 0);
4587
4588 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4589
4590 if (dc_flush_wa)
4591 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
4592
4593 intel_ring_advance(request, cs);
4594
4595 return 0;
4596}
4597
4598static int gen11_emit_flush_render(struct i915_request *request,
4599 u32 mode)
4600{
4601 if (mode & EMIT_FLUSH) {
4602 u32 *cs;
4603 u32 flags = 0;
4604
4605 flags |= PIPE_CONTROL_CS_STALL;
4606
4607 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4608 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4609 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4610 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4611 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4612 flags |= PIPE_CONTROL_QW_WRITE;
4613 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4614
4615 cs = intel_ring_begin(request, 6);
4616 if (IS_ERR(cs))
4617 return PTR_ERR(cs);
4618
4619 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4620 intel_ring_advance(request, cs);
4621 }
4622
4623 if (mode & EMIT_INVALIDATE) {
4624 u32 *cs;
4625 u32 flags = 0;
4626
4627 flags |= PIPE_CONTROL_CS_STALL;
4628
4629 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4630 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4631 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4632 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4633 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4634 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4635 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4636 flags |= PIPE_CONTROL_QW_WRITE;
4637 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4638
4639 cs = intel_ring_begin(request, 6);
4640 if (IS_ERR(cs))
4641 return PTR_ERR(cs);
4642
4643 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4644 intel_ring_advance(request, cs);
4645 }
4646
4647 return 0;
4648}
4649
4650static u32 preparser_disable(bool state)
4651{
4652 return MI_ARB_CHECK | 1 << 8 | state;
4653}
4654
4655static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4656{
4657 static const i915_reg_t vd[] = {
4658 GEN12_VD0_AUX_NV,
4659 GEN12_VD1_AUX_NV,
4660 GEN12_VD2_AUX_NV,
4661 GEN12_VD3_AUX_NV,
4662 };
4663
4664 static const i915_reg_t ve[] = {
4665 GEN12_VE0_AUX_NV,
4666 GEN12_VE1_AUX_NV,
4667 };
4668
4669 if (engine->class == VIDEO_DECODE_CLASS)
4670 return vd[engine->instance];
4671
4672 if (engine->class == VIDEO_ENHANCEMENT_CLASS)
4673 return ve[engine->instance];
4674
4675 GEM_BUG_ON("unknown aux_inv_reg\n");
4676
4677 return INVALID_MMIO_REG;
4678}
4679
4680static u32 *
4681gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4682{
4683 *cs++ = MI_LOAD_REGISTER_IMM(1);
4684 *cs++ = i915_mmio_reg_offset(inv_reg);
4685 *cs++ = AUX_INV;
4686 *cs++ = MI_NOOP;
4687
4688 return cs;
4689}
4690
4691static int gen12_emit_flush_render(struct i915_request *request,
4692 u32 mode)
4693{
4694 if (mode & EMIT_FLUSH) {
4695 u32 flags = 0;
4696 u32 *cs;
4697
4698 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
4699 flags |= PIPE_CONTROL_FLUSH_L3;
4700 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
4701 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
4702 /* Wa_1409600907:tgl */
4703 flags |= PIPE_CONTROL_DEPTH_STALL;
4704 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
4705 flags |= PIPE_CONTROL_FLUSH_ENABLE;
4706
4707 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4708 flags |= PIPE_CONTROL_QW_WRITE;
4709
4710 flags |= PIPE_CONTROL_CS_STALL;
4711
4712 cs = intel_ring_begin(request, 6);
4713 if (IS_ERR(cs))
4714 return PTR_ERR(cs);
4715
4716 cs = gen12_emit_pipe_control(cs,
4717 PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4718 flags, LRC_PPHWSP_SCRATCH_ADDR);
4719 intel_ring_advance(request, cs);
4720 }
4721
4722 if (mode & EMIT_INVALIDATE) {
4723 u32 flags = 0;
4724 u32 *cs;
4725
4726 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
4727 flags |= PIPE_CONTROL_TLB_INVALIDATE;
4728 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
4729 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
4730 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
4731 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
4732 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
4733
4734 flags |= PIPE_CONTROL_STORE_DATA_INDEX;
4735 flags |= PIPE_CONTROL_QW_WRITE;
4736
4737 flags |= PIPE_CONTROL_CS_STALL;
4738
4739 cs = intel_ring_begin(request, 8 + 4);
4740 if (IS_ERR(cs))
4741 return PTR_ERR(cs);
4742
4743 /*
4744 * Prevent the pre-parser from skipping past the TLB
4745 * invalidate and loading a stale page for the batch
4746 * buffer / request payload.
4747 */
4748 *cs++ = preparser_disable(true);
4749
4750 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
4751
4752 /* hsdes: 1809175790 */
4753 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
4754
4755 *cs++ = preparser_disable(false);
4756 intel_ring_advance(request, cs);
4757 }
4758
4759 return 0;
4760}
4761
4762static int gen12_emit_flush(struct i915_request *request, u32 mode)
4763{
4764 intel_engine_mask_t aux_inv = 0;
4765 u32 cmd, *cs;
4766
4767 if (mode & EMIT_INVALIDATE)
4768 aux_inv = request->engine->mask & ~BIT(BCS0);
4769
4770 cs = intel_ring_begin(request,
4771 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
4772 if (IS_ERR(cs))
4773 return PTR_ERR(cs);
4774
4775 cmd = MI_FLUSH_DW + 1;
4776
4777 /* We always require a command barrier so that subsequent
4778 * commands, such as breadcrumb interrupts, are strictly ordered
4779 * wrt the contents of the write cache being flushed to memory
4780 * (and thus being coherent from the CPU).
4781 */
4782 cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
4783
4784 if (mode & EMIT_INVALIDATE) {
4785 cmd |= MI_INVALIDATE_TLB;
4786 if (request->engine->class == VIDEO_DECODE_CLASS)
4787 cmd |= MI_INVALIDATE_BSD;
4788 }
4789
4790 *cs++ = cmd;
4791 *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
4792 *cs++ = 0; /* upper addr */
4793 *cs++ = 0; /* value */
4794
4795 if (aux_inv) { /* hsdes: 1809175790 */
4796 struct intel_engine_cs *engine;
4797 unsigned int tmp;
4798
4799 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
4800 for_each_engine_masked(engine, request->engine->gt,
4801 aux_inv, tmp) {
4802 *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4803 *cs++ = AUX_INV;
4804 }
4805 *cs++ = MI_NOOP;
4806 }
4807 intel_ring_advance(request, cs);
4808
4809 return 0;
4810}
4811
4812static void assert_request_valid(struct i915_request *rq)
4813{
4814 struct intel_ring *ring __maybe_unused = rq->ring;
4815
4816 /* Can we unwind this request without appearing to go forwards? */
4817 GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
4818}
4819
4820/*
4821 * Reserve space for 2 NOOPs at the end of each request to be
4822 * used as a workaround for not being allowed to do lite
4823 * restore with HEAD==TAIL (WaIdleLiteRestore).
4824 */
4825static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4826{
4827 /* Ensure there's always at least one preemption point per-request. */
4828 *cs++ = MI_ARB_CHECK;
4829 *cs++ = MI_NOOP;
4830 request->wa_tail = intel_ring_offset(request, cs);
4831
4832 /* Check that entire request is less than half the ring */
4833 assert_request_valid(request);
4834
4835 return cs;
4836}
4837
4838static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4839{
4840 *cs++ = MI_SEMAPHORE_WAIT |
4841 MI_SEMAPHORE_GLOBAL_GTT |
4842 MI_SEMAPHORE_POLL |
4843 MI_SEMAPHORE_SAD_EQ_SDD;
4844 *cs++ = 0;
4845 *cs++ = intel_hws_preempt_address(request->engine);
4846 *cs++ = 0;
4847
4848 return cs;
4849}
4850
4851static __always_inline u32*
4852gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4853{
4854 *cs++ = MI_USER_INTERRUPT;
4855
4856 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4857 if (intel_engine_has_semaphores(request->engine))
4858 cs = emit_preempt_busywait(request, cs);
4859
4860 request->tail = intel_ring_offset(request, cs);
4861 assert_ring_tail_valid(request->ring, request->tail);
4862
4863 return gen8_emit_wa_tail(request, cs);
4864}
4865
4866static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4867{
4868 u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4869
4870 return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4871}
4872
4873static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4874{
4875 return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4876}
4877
4878static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4879{
4880 cs = gen8_emit_pipe_control(cs,
4881 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4882 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4883 PIPE_CONTROL_DC_FLUSH_ENABLE,
4884 0);
4885
4886 /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4887 cs = gen8_emit_ggtt_write_rcs(cs,
4888 request->fence.seqno,
4889 i915_request_active_timeline(request)->hwsp_offset,
4890 PIPE_CONTROL_FLUSH_ENABLE |
4891 PIPE_CONTROL_CS_STALL);
4892
4893 return gen8_emit_fini_breadcrumb_tail(request, cs);
4894}
4895
4896static u32 *
4897gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4898{
4899 cs = gen8_emit_ggtt_write_rcs(cs,
4900 request->fence.seqno,
4901 i915_request_active_timeline(request)->hwsp_offset,
4902 PIPE_CONTROL_CS_STALL |
4903 PIPE_CONTROL_TILE_CACHE_FLUSH |
4904 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4905 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4906 PIPE_CONTROL_DC_FLUSH_ENABLE |
4907 PIPE_CONTROL_FLUSH_ENABLE);
4908
4909 return gen8_emit_fini_breadcrumb_tail(request, cs);
4910}
4911
4912/*
4913 * Note that the CS instruction pre-parser will not stall on the breadcrumb
4914 * flush and will continue pre-fetching the instructions after it before the
4915 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4916 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4917 * of the next request before the memory has been flushed, we're guaranteed that
4918 * we won't access the batch itself too early.
4919 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4920 * so, if the current request is modifying an instruction in the next request on
4921 * the same intel_context, we might pre-fetch and then execute the pre-update
4922 * instruction. To avoid this, the users of self-modifying code should either
4923 * disable the parser around the code emitting the memory writes, via a new flag
4924 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4925 * the in-kernel use-cases we've opted to use a separate context, see
4926 * reloc_gpu() as an example.
4927 * All the above applies only to the instructions themselves. Non-inline data
4928 * used by the instructions is not pre-fetched.
4929 */
4930
4931static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4932{
4933 *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
4934 MI_SEMAPHORE_GLOBAL_GTT |
4935 MI_SEMAPHORE_POLL |
4936 MI_SEMAPHORE_SAD_EQ_SDD;
4937 *cs++ = 0;
4938 *cs++ = intel_hws_preempt_address(request->engine);
4939 *cs++ = 0;
4940 *cs++ = 0;
4941 *cs++ = MI_NOOP;
4942
4943 return cs;
4944}
4945
4946static __always_inline u32*
4947gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4948{
4949 *cs++ = MI_USER_INTERRUPT;
4950
4951 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4952 if (intel_engine_has_semaphores(request->engine))
4953 cs = gen12_emit_preempt_busywait(request, cs);
4954
4955 request->tail = intel_ring_offset(request, cs);
4956 assert_ring_tail_valid(request->ring, request->tail);
4957
4958 return gen8_emit_wa_tail(request, cs);
4959}
4960
4961static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4962{
4963 return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4964}
4965
4966static u32 *
4967gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4968{
4969 cs = gen12_emit_ggtt_write_rcs(cs,
4970 request->fence.seqno,
4971 i915_request_active_timeline(request)->hwsp_offset,
4972 PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
4973 PIPE_CONTROL_CS_STALL |
4974 PIPE_CONTROL_TILE_CACHE_FLUSH |
4975 PIPE_CONTROL_FLUSH_L3 |
4976 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
4977 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
4978 /* Wa_1409600907:tgl */
4979 PIPE_CONTROL_DEPTH_STALL |
4980 PIPE_CONTROL_DC_FLUSH_ENABLE |
4981 PIPE_CONTROL_FLUSH_ENABLE);
4982
4983 return gen12_emit_fini_breadcrumb_tail(request, cs);
4984}
4985
4986static void execlists_park(struct intel_engine_cs *engine)
4987{
4988 cancel_timer(&engine->execlists.timer);
4989 cancel_timer(&engine->execlists.preempt);
4990}
4991
4992void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
4993{
4994 engine->submit_request = execlists_submit_request;
4995 engine->schedule = i915_schedule;
4996 engine->execlists.tasklet.func = execlists_submission_tasklet;
4997
4998 engine->reset.prepare = execlists_reset_prepare;
4999 engine->reset.rewind = execlists_reset_rewind;
5000 engine->reset.cancel = execlists_reset_cancel;
5001 engine->reset.finish = execlists_reset_finish;
5002
5003 engine->park = execlists_park;
5004 engine->unpark = NULL;
5005
5006 engine->flags |= I915_ENGINE_SUPPORTS_STATS;
5007 if (!intel_vgpu_active(engine->i915)) {
5008 engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
5009 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {
5010 engine->flags |= I915_ENGINE_HAS_PREEMPTION;
5011 if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
5012 engine->flags |= I915_ENGINE_HAS_TIMESLICES;
5013 }
5014 }
5015
5016 if (INTEL_GEN(engine->i915) >= 12)
5017 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
5018
5019 if (intel_engine_has_preemption(engine))
5020 engine->emit_bb_start = gen8_emit_bb_start;
5021 else
5022 engine->emit_bb_start = gen8_emit_bb_start_noarb;
5023}
5024
5025static void execlists_shutdown(struct intel_engine_cs *engine)
5026{
5027 /* Synchronise with residual timers and any softirq they raise */
5028 del_timer_sync(&engine->execlists.timer);
5029 del_timer_sync(&engine->execlists.preempt);
5030 tasklet_kill(&engine->execlists.tasklet);
5031}
5032
5033static void execlists_release(struct intel_engine_cs *engine)
5034{
5035 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
5036
5037 execlists_shutdown(engine);
5038
5039 intel_engine_cleanup_common(engine);
5040 lrc_destroy_wa_ctx(engine);
5041}
5042
5043static void
5044logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5045{
5046 /* Default vfuncs which can be overriden by each engine. */
5047
5048 engine->resume = execlists_resume;
5049
5050 engine->cops = &execlists_context_ops;
5051 engine->request_alloc = execlists_request_alloc;
5052
5053 engine->emit_flush = gen8_emit_flush;
5054 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5055 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5056 if (INTEL_GEN(engine->i915) >= 12) {
5057 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5058 engine->emit_flush = gen12_emit_flush;
5059 }
5060 engine->set_default_submission = intel_execlists_set_default_submission;
5061
5062 if (INTEL_GEN(engine->i915) < 11) {
5063 engine->irq_enable = gen8_logical_ring_enable_irq;
5064 engine->irq_disable = gen8_logical_ring_disable_irq;
5065 } else {
5066 /*
5067 * TODO: On Gen11 interrupt masks need to be clear
5068 * to allow C6 entry. Keep interrupts enabled at
5069 * and take the hit of generating extra interrupts
5070 * until a more refined solution exists.
5071 */
5072 }
5073}
5074
5075static inline void
5076logical_ring_default_irqs(struct intel_engine_cs *engine)
5077{
5078 unsigned int shift = 0;
5079
5080 if (INTEL_GEN(engine->i915) < 11) {
5081 const u8 irq_shifts[] = {
5082 [RCS0] = GEN8_RCS_IRQ_SHIFT,
5083 [BCS0] = GEN8_BCS_IRQ_SHIFT,
5084 [VCS0] = GEN8_VCS0_IRQ_SHIFT,
5085 [VCS1] = GEN8_VCS1_IRQ_SHIFT,
5086 [VECS0] = GEN8_VECS_IRQ_SHIFT,
5087 };
5088
5089 shift = irq_shifts[engine->id];
5090 }
5091
5092 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
5093 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
5094 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
5095 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
5096}
5097
5098static void rcs_submission_override(struct intel_engine_cs *engine)
5099{
5100 switch (INTEL_GEN(engine->i915)) {
5101 case 12:
5102 engine->emit_flush = gen12_emit_flush_render;
5103 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5104 break;
5105 case 11:
5106 engine->emit_flush = gen11_emit_flush_render;
5107 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5108 break;
5109 default:
5110 engine->emit_flush = gen8_emit_flush_render;
5111 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5112 break;
5113 }
5114}
5115
5116int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5117{
5118 struct intel_engine_execlists * const execlists = &engine->execlists;
5119 struct drm_i915_private *i915 = engine->i915;
5120 struct intel_uncore *uncore = engine->uncore;
5121 u32 base = engine->mmio_base;
5122
5123 tasklet_init(&engine->execlists.tasklet,
5124 execlists_submission_tasklet, (unsigned long)engine);
5125 timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5126 timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5127
5128 logical_ring_default_vfuncs(engine);
5129 logical_ring_default_irqs(engine);
5130
5131 if (engine->class == RENDER_CLASS)
5132 rcs_submission_override(engine);
5133
5134 if (intel_init_workaround_bb(engine))
5135 /*
5136 * We continue even if we fail to initialize WA batch
5137 * because we only expect rare glitches but nothing
5138 * critical to prevent us from using GPU
5139 */
5140 drm_err(&i915->drm, "WA batch buffer initialization failed\n");
5141
5142 if (HAS_LOGICAL_RING_ELSQ(i915)) {
5143 execlists->submit_reg = uncore->regs +
5144 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
5145 execlists->ctrl_reg = uncore->regs +
5146 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
5147 } else {
5148 execlists->submit_reg = uncore->regs +
5149 i915_mmio_reg_offset(RING_ELSP(base));
5150 }
5151
5152 execlists->csb_status =
5153 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
5154
5155 execlists->csb_write =
5156 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5157
5158 if (INTEL_GEN(i915) < 11)
5159 execlists->csb_size = GEN8_CSB_ENTRIES;
5160 else
5161 execlists->csb_size = GEN11_CSB_ENTRIES;
5162
5163 if (INTEL_GEN(engine->i915) >= 11) {
5164 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
5165 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32);
5166 }
5167
5168 /* Finally, take ownership and responsibility for cleanup! */
5169 engine->sanitize = execlists_sanitize;
5170 engine->release = execlists_release;
5171
5172 return 0;
5173}
5174
5175static void init_common_reg_state(u32 * const regs,
5176 const struct intel_engine_cs *engine,
5177 const struct intel_ring *ring,
5178 bool inhibit)
5179{
5180 u32 ctl;
5181
5182 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
5183 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
5184 if (inhibit)
5185 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
5186 if (INTEL_GEN(engine->i915) < 11)
5187 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
5188 CTX_CTRL_RS_CTX_ENABLE);
5189 regs[CTX_CONTEXT_CONTROL] = ctl;
5190
5191 regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
5192 regs[CTX_TIMESTAMP] = 0;
5193}
5194
5195static void init_wa_bb_reg_state(u32 * const regs,
5196 const struct intel_engine_cs *engine)
5197{
5198 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5199
5200 if (wa_ctx->per_ctx.size) {
5201 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5202
5203 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
5204 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5205 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5206 }
5207
5208 if (wa_ctx->indirect_ctx.size) {
5209 lrc_ring_setup_indirect_ctx(regs, engine,
5210 i915_ggtt_offset(wa_ctx->vma) +
5211 wa_ctx->indirect_ctx.offset,
5212 wa_ctx->indirect_ctx.size);
5213 }
5214}
5215
5216static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5217{
5218 if (i915_vm_is_4lvl(&ppgtt->vm)) {
5219 /* 64b PPGTT (48bit canonical)
5220 * PDP0_DESCRIPTOR contains the base address to PML4 and
5221 * other PDP Descriptors are ignored.
5222 */
5223 ASSIGN_CTX_PML4(ppgtt, regs);
5224 } else {
5225 ASSIGN_CTX_PDP(ppgtt, regs, 3);
5226 ASSIGN_CTX_PDP(ppgtt, regs, 2);
5227 ASSIGN_CTX_PDP(ppgtt, regs, 1);
5228 ASSIGN_CTX_PDP(ppgtt, regs, 0);
5229 }
5230}
5231
5232static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5233{
5234 if (i915_is_ggtt(vm))
5235 return i915_vm_to_ggtt(vm)->alias;
5236 else
5237 return i915_vm_to_ppgtt(vm);
5238}
5239
5240static void execlists_init_reg_state(u32 *regs,
5241 const struct intel_context *ce,
5242 const struct intel_engine_cs *engine,
5243 const struct intel_ring *ring,
5244 bool inhibit)
5245{
5246 /*
5247 * A context is actually a big batch buffer with several
5248 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5249 * values we are setting here are only for the first context restore:
5250 * on a subsequent save, the GPU will recreate this batchbuffer with new
5251 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5252 * we are not initializing here).
5253 *
5254 * Must keep consistent with virtual_update_register_offsets().
5255 */
5256 set_offsets(regs, reg_offsets(engine), engine, inhibit);
5257
5258 init_common_reg_state(regs, engine, ring, inhibit);
5259 init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5260
5261 init_wa_bb_reg_state(regs, engine);
5262
5263 __reset_stop_ring(regs, engine);
5264}
5265
5266static int
5267populate_lr_context(struct intel_context *ce,
5268 struct drm_i915_gem_object *ctx_obj,
5269 struct intel_engine_cs *engine,
5270 struct intel_ring *ring)
5271{
5272 bool inhibit = true;
5273 void *vaddr;
5274
5275 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5276 if (IS_ERR(vaddr)) {
5277 drm_dbg(&engine->i915->drm, "Could not map object pages!\n");
5278 return PTR_ERR(vaddr);
5279 }
5280
5281 set_redzone(vaddr, engine);
5282
5283 if (engine->default_state) {
5284 shmem_read(engine->default_state, 0,
5285 vaddr, engine->context_size);
5286 __set_bit(CONTEXT_VALID_BIT, &ce->flags);
5287 inhibit = false;
5288 }
5289
5290 /* Clear the ppHWSP (inc. per-context counters) */
5291 memset(vaddr, 0, PAGE_SIZE);
5292
5293 /*
5294 * The second page of the context object contains some registers which
5295 * must be set up prior to the first execution.
5296 */
5297 execlists_init_reg_state(vaddr + LRC_STATE_OFFSET,
5298 ce, engine, ring, inhibit);
5299
5300 __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5301 i915_gem_object_unpin_map(ctx_obj);
5302 return 0;
5303}
5304
5305static int __execlists_context_alloc(struct intel_context *ce,
5306 struct intel_engine_cs *engine)
5307{
5308 struct drm_i915_gem_object *ctx_obj;
5309 struct intel_ring *ring;
5310 struct i915_vma *vma;
5311 u32 context_size;
5312 int ret;
5313
5314 GEM_BUG_ON(ce->state);
5315 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
5316
5317 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
5318 context_size += I915_GTT_PAGE_SIZE; /* for redzone */
5319
5320 if (INTEL_GEN(engine->i915) == 12) {
5321 ce->wa_bb_page = context_size / PAGE_SIZE;
5322 context_size += PAGE_SIZE;
5323 }
5324
5325 ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5326 if (IS_ERR(ctx_obj))
5327 return PTR_ERR(ctx_obj);
5328
5329 vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
5330 if (IS_ERR(vma)) {
5331 ret = PTR_ERR(vma);
5332 goto error_deref_obj;
5333 }
5334
5335 if (!ce->timeline) {
5336 struct intel_timeline *tl;
5337 struct i915_vma *hwsp;
5338
5339 /*
5340 * Use the static global HWSP for the kernel context, and
5341 * a dynamically allocated cacheline for everyone else.
5342 */
5343 hwsp = NULL;
5344 if (unlikely(intel_context_is_barrier(ce)))
5345 hwsp = engine->status_page.vma;
5346
5347 tl = intel_timeline_create(engine->gt, hwsp);
5348 if (IS_ERR(tl)) {
5349 ret = PTR_ERR(tl);
5350 goto error_deref_obj;
5351 }
5352
5353 ce->timeline = tl;
5354 }
5355
5356 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5357 if (IS_ERR(ring)) {
5358 ret = PTR_ERR(ring);
5359 goto error_deref_obj;
5360 }
5361
5362 ret = populate_lr_context(ce, ctx_obj, engine, ring);
5363 if (ret) {
5364 drm_dbg(&engine->i915->drm,
5365 "Failed to populate LRC: %d\n", ret);
5366 goto error_ring_free;
5367 }
5368
5369 ce->ring = ring;
5370 ce->state = vma;
5371
5372 return 0;
5373
5374error_ring_free:
5375 intel_ring_put(ring);
5376error_deref_obj:
5377 i915_gem_object_put(ctx_obj);
5378 return ret;
5379}
5380
5381static struct list_head *virtual_queue(struct virtual_engine *ve)
5382{
5383 return &ve->base.execlists.default_priolist.requests[0];
5384}
5385
5386static void virtual_context_destroy(struct kref *kref)
5387{
5388 struct virtual_engine *ve =
5389 container_of(kref, typeof(*ve), context.ref);
5390 unsigned int n;
5391
5392 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5393 GEM_BUG_ON(ve->request);
5394 GEM_BUG_ON(ve->context.inflight);
5395
5396 for (n = 0; n < ve->num_siblings; n++) {
5397 struct intel_engine_cs *sibling = ve->siblings[n];
5398 struct rb_node *node = &ve->nodes[sibling->id].rb;
5399 unsigned long flags;
5400
5401 if (RB_EMPTY_NODE(node))
5402 continue;
5403
5404 spin_lock_irqsave(&sibling->active.lock, flags);
5405
5406 /* Detachment is lazily performed in the execlists tasklet */
5407 if (!RB_EMPTY_NODE(node))
5408 rb_erase_cached(node, &sibling->execlists.virtual);
5409
5410 spin_unlock_irqrestore(&sibling->active.lock, flags);
5411 }
5412 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
5413
5414 if (ve->context.state)
5415 __execlists_context_fini(&ve->context);
5416 intel_context_fini(&ve->context);
5417
5418 intel_engine_free_request_pool(&ve->base);
5419
5420 kfree(ve->bonds);
5421 kfree(ve);
5422}
5423
5424static void virtual_engine_initial_hint(struct virtual_engine *ve)
5425{
5426 int swp;
5427
5428 /*
5429 * Pick a random sibling on starting to help spread the load around.
5430 *
5431 * New contexts are typically created with exactly the same order
5432 * of siblings, and often started in batches. Due to the way we iterate
5433 * the array of sibling when submitting requests, sibling[0] is
5434 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5435 * randomised across the system, we also help spread the load by the
5436 * first engine we inspect being different each time.
5437 *
5438 * NB This does not force us to execute on this engine, it will just
5439 * typically be the first we inspect for submission.
5440 */
5441 swp = prandom_u32_max(ve->num_siblings);
5442 if (swp)
5443 swap(ve->siblings[swp], ve->siblings[0]);
5444}
5445
5446static int virtual_context_alloc(struct intel_context *ce)
5447{
5448 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5449
5450 return __execlists_context_alloc(ce, ve->siblings[0]);
5451}
5452
5453static int virtual_context_pin(struct intel_context *ce)
5454{
5455 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5456
5457 /* Note: we must use a real engine class for setting up reg state */
5458 return __execlists_context_pin(ce, ve->siblings[0]);
5459}
5460
5461static void virtual_context_enter(struct intel_context *ce)
5462{
5463 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5464 unsigned int n;
5465
5466 for (n = 0; n < ve->num_siblings; n++)
5467 intel_engine_pm_get(ve->siblings[n]);
5468
5469 intel_timeline_enter(ce->timeline);
5470}
5471
5472static void virtual_context_exit(struct intel_context *ce)
5473{
5474 struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
5475 unsigned int n;
5476
5477 intel_timeline_exit(ce->timeline);
5478
5479 for (n = 0; n < ve->num_siblings; n++)
5480 intel_engine_pm_put(ve->siblings[n]);
5481}
5482
5483static const struct intel_context_ops virtual_context_ops = {
5484 .alloc = virtual_context_alloc,
5485
5486 .pin = virtual_context_pin,
5487 .unpin = execlists_context_unpin,
5488
5489 .enter = virtual_context_enter,
5490 .exit = virtual_context_exit,
5491
5492 .destroy = virtual_context_destroy,
5493};
5494
5495static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5496{
5497 struct i915_request *rq;
5498 intel_engine_mask_t mask;
5499
5500 rq = READ_ONCE(ve->request);
5501 if (!rq)
5502 return 0;
5503
5504 /* The rq is ready for submission; rq->execution_mask is now stable. */
5505 mask = rq->execution_mask;
5506 if (unlikely(!mask)) {
5507 /* Invalid selection, submit to a random engine in error */
5508 i915_request_set_error_once(rq, -ENODEV);
5509 mask = ve->siblings[0]->mask;
5510 }
5511
5512 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
5513 rq->fence.context, rq->fence.seqno,
5514 mask, ve->base.execlists.queue_priority_hint);
5515
5516 return mask;
5517}
5518
5519static void virtual_submission_tasklet(unsigned long data)
5520{
5521 struct virtual_engine * const ve = (struct virtual_engine *)data;
5522 const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
5523 intel_engine_mask_t mask;
5524 unsigned int n;
5525
5526 rcu_read_lock();
5527 mask = virtual_submission_mask(ve);
5528 rcu_read_unlock();
5529 if (unlikely(!mask))
5530 return;
5531
5532 local_irq_disable();
5533 for (n = 0; n < ve->num_siblings; n++) {
5534 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
5535 struct ve_node * const node = &ve->nodes[sibling->id];
5536 struct rb_node **parent, *rb;
5537 bool first;
5538
5539 if (!READ_ONCE(ve->request))
5540 break; /* already handled by a sibling's tasklet */
5541
5542 if (unlikely(!(mask & sibling->mask))) {
5543 if (!RB_EMPTY_NODE(&node->rb)) {
5544 spin_lock(&sibling->active.lock);
5545 rb_erase_cached(&node->rb,
5546 &sibling->execlists.virtual);
5547 RB_CLEAR_NODE(&node->rb);
5548 spin_unlock(&sibling->active.lock);
5549 }
5550 continue;
5551 }
5552
5553 spin_lock(&sibling->active.lock);
5554
5555 if (!RB_EMPTY_NODE(&node->rb)) {
5556 /*
5557 * Cheat and avoid rebalancing the tree if we can
5558 * reuse this node in situ.
5559 */
5560 first = rb_first_cached(&sibling->execlists.virtual) ==
5561 &node->rb;
5562 if (prio == node->prio || (prio > node->prio && first))
5563 goto submit_engine;
5564
5565 rb_erase_cached(&node->rb, &sibling->execlists.virtual);
5566 }
5567
5568 rb = NULL;
5569 first = true;
5570 parent = &sibling->execlists.virtual.rb_root.rb_node;
5571 while (*parent) {
5572 struct ve_node *other;
5573
5574 rb = *parent;
5575 other = rb_entry(rb, typeof(*other), rb);
5576 if (prio > other->prio) {
5577 parent = &rb->rb_left;
5578 } else {
5579 parent = &rb->rb_right;
5580 first = false;
5581 }
5582 }
5583
5584 rb_link_node(&node->rb, rb, parent);
5585 rb_insert_color_cached(&node->rb,
5586 &sibling->execlists.virtual,
5587 first);
5588
5589submit_engine:
5590 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
5591 node->prio = prio;
5592 if (first && prio > sibling->execlists.queue_priority_hint)
5593 tasklet_hi_schedule(&sibling->execlists.tasklet);
5594
5595 spin_unlock(&sibling->active.lock);
5596 }
5597 local_irq_enable();
5598}
5599
5600static void virtual_submit_request(struct i915_request *rq)
5601{
5602 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5603 struct i915_request *old;
5604 unsigned long flags;
5605
5606 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
5607 rq->fence.context,
5608 rq->fence.seqno);
5609
5610 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
5611
5612 spin_lock_irqsave(&ve->base.active.lock, flags);
5613
5614 old = ve->request;
5615 if (old) { /* background completion event from preempt-to-busy */
5616 GEM_BUG_ON(!i915_request_completed(old));
5617 __i915_request_submit(old);
5618 i915_request_put(old);
5619 }
5620
5621 if (i915_request_completed(rq)) {
5622 __i915_request_submit(rq);
5623
5624 ve->base.execlists.queue_priority_hint = INT_MIN;
5625 ve->request = NULL;
5626 } else {
5627 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5628 ve->request = i915_request_get(rq);
5629
5630 GEM_BUG_ON(!list_empty(virtual_queue(ve)));
5631 list_move_tail(&rq->sched.link, virtual_queue(ve));
5632
5633 tasklet_hi_schedule(&ve->base.execlists.tasklet);
5634 }
5635
5636 spin_unlock_irqrestore(&ve->base.active.lock, flags);
5637}
5638
5639static struct ve_bond *
5640virtual_find_bond(struct virtual_engine *ve,
5641 const struct intel_engine_cs *master)
5642{
5643 int i;
5644
5645 for (i = 0; i < ve->num_bonds; i++) {
5646 if (ve->bonds[i].master == master)
5647 return &ve->bonds[i];
5648 }
5649
5650 return NULL;
5651}
5652
5653static void
5654virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5655{
5656 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5657 intel_engine_mask_t allowed, exec;
5658 struct ve_bond *bond;
5659
5660 allowed = ~to_request(signal)->engine->mask;
5661
5662 bond = virtual_find_bond(ve, to_request(signal)->engine);
5663 if (bond)
5664 allowed &= bond->sibling_mask;
5665
5666 /* Restrict the bonded request to run on only the available engines */
5667 exec = READ_ONCE(rq->execution_mask);
5668 while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
5669 ;
5670
5671 /* Prevent the master from being re-run on the bonded engines */
5672 to_request(signal)->execution_mask &= ~allowed;
5673}
5674
5675struct intel_context *
5676intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5677 unsigned int count)
5678{
5679 struct virtual_engine *ve;
5680 unsigned int n;
5681 int err;
5682
5683 if (count == 0)
5684 return ERR_PTR(-EINVAL);
5685
5686 if (count == 1)
5687 return intel_context_create(siblings[0]);
5688
5689 ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
5690 if (!ve)
5691 return ERR_PTR(-ENOMEM);
5692
5693 ve->base.i915 = siblings[0]->i915;
5694 ve->base.gt = siblings[0]->gt;
5695 ve->base.uncore = siblings[0]->uncore;
5696 ve->base.id = -1;
5697
5698 ve->base.class = OTHER_CLASS;
5699 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5700 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5701 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
5702
5703 /*
5704 * The decision on whether to submit a request using semaphores
5705 * depends on the saturated state of the engine. We only compute
5706 * this during HW submission of the request, and we need for this
5707 * state to be globally applied to all requests being submitted
5708 * to this engine. Virtual engines encompass more than one physical
5709 * engine and so we cannot accurately tell in advance if one of those
5710 * engines is already saturated and so cannot afford to use a semaphore
5711 * and be pessimized in priority for doing so -- if we are the only
5712 * context using semaphores after all other clients have stopped, we
5713 * will be starved on the saturated system. Such a global switch for
5714 * semaphores is less than ideal, but alas is the current compromise.
5715 */
5716 ve->base.saturated = ALL_ENGINES;
5717
5718 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5719
5720 intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
5721 intel_engine_init_breadcrumbs(&ve->base);
5722 intel_engine_init_execlists(&ve->base);
5723 ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
5724
5725 ve->base.cops = &virtual_context_ops;
5726 ve->base.request_alloc = execlists_request_alloc;
5727
5728 ve->base.schedule = i915_schedule;
5729 ve->base.submit_request = virtual_submit_request;
5730 ve->base.bond_execute = virtual_bond_execute;
5731
5732 INIT_LIST_HEAD(virtual_queue(ve));
5733 ve->base.execlists.queue_priority_hint = INT_MIN;
5734 tasklet_init(&ve->base.execlists.tasklet,
5735 virtual_submission_tasklet,
5736 (unsigned long)ve);
5737
5738 intel_context_init(&ve->context, &ve->base);
5739
5740 for (n = 0; n < count; n++) {
5741 struct intel_engine_cs *sibling = siblings[n];
5742
5743 GEM_BUG_ON(!is_power_of_2(sibling->mask));
5744 if (sibling->mask & ve->base.mask) {
5745 DRM_DEBUG("duplicate %s entry in load balancer\n",
5746 sibling->name);
5747 err = -EINVAL;
5748 goto err_put;
5749 }
5750
5751 /*
5752 * The virtual engine implementation is tightly coupled to
5753 * the execlists backend -- we push out request directly
5754 * into a tree inside each physical engine. We could support
5755 * layering if we handle cloning of the requests and
5756 * submitting a copy into each backend.
5757 */
5758 if (sibling->execlists.tasklet.func !=
5759 execlists_submission_tasklet) {
5760 err = -ENODEV;
5761 goto err_put;
5762 }
5763
5764 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
5765 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
5766
5767 ve->siblings[ve->num_siblings++] = sibling;
5768 ve->base.mask |= sibling->mask;
5769
5770 /*
5771 * All physical engines must be compatible for their emission
5772 * functions (as we build the instructions during request
5773 * construction and do not alter them before submission
5774 * on the physical engine). We use the engine class as a guide
5775 * here, although that could be refined.
5776 */
5777 if (ve->base.class != OTHER_CLASS) {
5778 if (ve->base.class != sibling->class) {
5779 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
5780 sibling->class, ve->base.class);
5781 err = -EINVAL;
5782 goto err_put;
5783 }
5784 continue;
5785 }
5786
5787 ve->base.class = sibling->class;
5788 ve->base.uabi_class = sibling->uabi_class;
5789 snprintf(ve->base.name, sizeof(ve->base.name),
5790 "v%dx%d", ve->base.class, count);
5791 ve->base.context_size = sibling->context_size;
5792
5793 ve->base.emit_bb_start = sibling->emit_bb_start;
5794 ve->base.emit_flush = sibling->emit_flush;
5795 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5796 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5797 ve->base.emit_fini_breadcrumb_dw =
5798 sibling->emit_fini_breadcrumb_dw;
5799
5800 ve->base.flags = sibling->flags;
5801 }
5802
5803 ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
5804
5805 virtual_engine_initial_hint(ve);
5806 return &ve->context;
5807
5808err_put:
5809 intel_context_put(&ve->context);
5810 return ERR_PTR(err);
5811}
5812
5813struct intel_context *
5814intel_execlists_clone_virtual(struct intel_engine_cs *src)
5815{
5816 struct virtual_engine *se = to_virtual_engine(src);
5817 struct intel_context *dst;
5818
5819 dst = intel_execlists_create_virtual(se->siblings,
5820 se->num_siblings);
5821 if (IS_ERR(dst))
5822 return dst;
5823
5824 if (se->num_bonds) {
5825 struct virtual_engine *de = to_virtual_engine(dst->engine);
5826
5827 de->bonds = kmemdup(se->bonds,
5828 sizeof(*se->bonds) * se->num_bonds,
5829 GFP_KERNEL);
5830 if (!de->bonds) {
5831 intel_context_put(dst);
5832 return ERR_PTR(-ENOMEM);
5833 }
5834
5835 de->num_bonds = se->num_bonds;
5836 }
5837
5838 return dst;
5839}
5840
5841int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5842 const struct intel_engine_cs *master,
5843 const struct intel_engine_cs *sibling)
5844{
5845 struct virtual_engine *ve = to_virtual_engine(engine);
5846 struct ve_bond *bond;
5847 int n;
5848
5849 /* Sanity check the sibling is part of the virtual engine */
5850 for (n = 0; n < ve->num_siblings; n++)
5851 if (sibling == ve->siblings[n])
5852 break;
5853 if (n == ve->num_siblings)
5854 return -EINVAL;
5855
5856 bond = virtual_find_bond(ve, master);
5857 if (bond) {
5858 bond->sibling_mask |= sibling->mask;
5859 return 0;
5860 }
5861
5862 bond = krealloc(ve->bonds,
5863 sizeof(*bond) * (ve->num_bonds + 1),
5864 GFP_KERNEL);
5865 if (!bond)
5866 return -ENOMEM;
5867
5868 bond[ve->num_bonds].master = master;
5869 bond[ve->num_bonds].sibling_mask = sibling->mask;
5870
5871 ve->bonds = bond;
5872 ve->num_bonds++;
5873
5874 return 0;
5875}
5876
5877struct intel_engine_cs *
5878intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
5879 unsigned int sibling)
5880{
5881 struct virtual_engine *ve = to_virtual_engine(engine);
5882
5883 if (sibling >= ve->num_siblings)
5884 return NULL;
5885
5886 return ve->siblings[sibling];
5887}
5888
5889void intel_execlists_show_requests(struct intel_engine_cs *engine,
5890 struct drm_printer *m,
5891 void (*show_request)(struct drm_printer *m,
5892 struct i915_request *rq,
5893 const char *prefix),
5894 unsigned int max)
5895{
5896 const struct intel_engine_execlists *execlists = &engine->execlists;
5897 struct i915_request *rq, *last;
5898 unsigned long flags;
5899 unsigned int count;
5900 struct rb_node *rb;
5901
5902 spin_lock_irqsave(&engine->active.lock, flags);
5903
5904 last = NULL;
5905 count = 0;
5906 list_for_each_entry(rq, &engine->active.requests, sched.link) {
5907 if (count++ < max - 1)
5908 show_request(m, rq, "\t\tE ");
5909 else
5910 last = rq;
5911 }
5912 if (last) {
5913 if (count > max) {
5914 drm_printf(m,
5915 "\t\t...skipping %d executing requests...\n",
5916 count - max);
5917 }
5918 show_request(m, last, "\t\tE ");
5919 }
5920
5921 if (execlists->switch_priority_hint != INT_MIN)
5922 drm_printf(m, "\t\tSwitch priority hint: %d\n",
5923 READ_ONCE(execlists->switch_priority_hint));
5924 if (execlists->queue_priority_hint != INT_MIN)
5925 drm_printf(m, "\t\tQueue priority hint: %d\n",
5926 READ_ONCE(execlists->queue_priority_hint));
5927
5928 last = NULL;
5929 count = 0;
5930 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
5931 struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
5932 int i;
5933
5934 priolist_for_each_request(rq, p, i) {
5935 if (count++ < max - 1)
5936 show_request(m, rq, "\t\tQ ");
5937 else
5938 last = rq;
5939 }
5940 }
5941 if (last) {
5942 if (count > max) {
5943 drm_printf(m,
5944 "\t\t...skipping %d queued requests...\n",
5945 count - max);
5946 }
5947 show_request(m, last, "\t\tQ ");
5948 }
5949
5950 last = NULL;
5951 count = 0;
5952 for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
5953 struct virtual_engine *ve =
5954 rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
5955 struct i915_request *rq = READ_ONCE(ve->request);
5956
5957 if (rq) {
5958 if (count++ < max - 1)
5959 show_request(m, rq, "\t\tV ");
5960 else
5961 last = rq;
5962 }
5963 }
5964 if (last) {
5965 if (count > max) {
5966 drm_printf(m,
5967 "\t\t...skipping %d virtual requests...\n",
5968 count - max);
5969 }
5970 show_request(m, last, "\t\tV ");
5971 }
5972
5973 spin_unlock_irqrestore(&engine->active.lock, flags);
5974}
5975
5976void intel_lr_context_reset(struct intel_engine_cs *engine,
5977 struct intel_context *ce,
5978 u32 head,
5979 bool scrub)
5980{
5981 GEM_BUG_ON(!intel_context_is_pinned(ce));
5982
5983 /*
5984 * We want a simple context + ring to execute the breadcrumb update.
5985 * We cannot rely on the context being intact across the GPU hang,
5986 * so clear it and rebuild just what we need for the breadcrumb.
5987 * All pending requests for this context will be zapped, and any
5988 * future request will be after userspace has had the opportunity
5989 * to recreate its own state.
5990 */
5991 if (scrub)
5992 restore_default_state(ce, engine);
5993
5994 /* Rerun the request; its payload has been neutered (if guilty). */
5995 __execlists_update_reg_state(ce, engine, head);
5996}
5997
5998bool
5999intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6000{
6001 return engine->set_default_submission ==
6002 intel_execlists_set_default_submission;
6003}
6004
6005#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
6006#include "selftest_lrc.c"
6007#endif