Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Feb 10-13, 2025
Register
Loading...
Note: File does not exist in v6.2.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2021 Intel Corporation
  4 */
  5
  6#include "xe_execlist.h"
  7
  8#include <drm/drm_managed.h>
  9
 10#include "instructions/xe_mi_commands.h"
 11#include "regs/xe_engine_regs.h"
 12#include "regs/xe_gt_regs.h"
 13#include "regs/xe_lrc_layout.h"
 14#include "xe_assert.h"
 15#include "xe_bo.h"
 16#include "xe_device.h"
 17#include "xe_exec_queue.h"
 18#include "xe_gt.h"
 19#include "xe_hw_fence.h"
 20#include "xe_lrc.h"
 21#include "xe_macros.h"
 22#include "xe_mmio.h"
 23#include "xe_mocs.h"
 24#include "xe_ring_ops_types.h"
 25#include "xe_sched_job.h"
 26
 27#define XE_EXECLIST_HANG_LIMIT 1
 28
 29#define SW_CTX_ID_SHIFT 37
 30#define SW_CTX_ID_WIDTH 11
 31#define XEHP_SW_CTX_ID_SHIFT  39
 32#define XEHP_SW_CTX_ID_WIDTH  16
 33
 34#define SW_CTX_ID \
 35	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
 36		    SW_CTX_ID_SHIFT)
 37
 38#define XEHP_SW_CTX_ID \
 39	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
 40		    XEHP_SW_CTX_ID_SHIFT)
 41
 42
 43static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
 44			u32 ctx_id)
 45{
 46	struct xe_gt *gt = hwe->gt;
 47	struct xe_mmio *mmio = &gt->mmio;
 48	struct xe_device *xe = gt_to_xe(gt);
 49	u64 lrc_desc;
 50
 51	lrc_desc = xe_lrc_descriptor(lrc);
 52
 53	if (GRAPHICS_VERx100(xe) >= 1250) {
 54		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
 55		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
 56	} else {
 57		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
 58		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
 59	}
 60
 61	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
 62		xe_mmio_write32(mmio, RCU_MODE,
 63				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
 64
 65	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
 66	lrc->ring.old_tail = lrc->ring.tail;
 67
 68	/*
 69	 * Make sure the context image is complete before we submit it to HW.
 70	 *
 71	 * Ostensibly, writes (including the WCB) should be flushed prior to
 72	 * an uncached write such as our mmio register access, the empirical
 73	 * evidence (esp. on Braswell) suggests that the WC write into memory
 74	 * may not be visible to the HW prior to the completion of the UC
 75	 * register write and that we may begin execution from the context
 76	 * before its image is complete leading to invalid PD chasing.
 77	 */
 78	wmb();
 79
 80	xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
 81			xe_bo_ggtt_addr(hwe->hwsp));
 82	xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
 83	xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
 84			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
 85
 86	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
 87			lower_32_bits(lrc_desc));
 88	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
 89			upper_32_bits(lrc_desc));
 90	xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
 91			EL_CTRL_LOAD);
 92}
 93
 94static void __xe_execlist_port_start(struct xe_execlist_port *port,
 95				     struct xe_execlist_exec_queue *exl)
 96{
 97	struct xe_device *xe = gt_to_xe(port->hwe->gt);
 98	int max_ctx = FIELD_MAX(SW_CTX_ID);
 99
100	if (GRAPHICS_VERx100(xe) >= 1250)
101		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
102
103	xe_execlist_port_assert_held(port);
104
105	if (port->running_exl != exl || !exl->has_run) {
106		port->last_ctx_id++;
107
108		/* 0 is reserved for the kernel context */
109		if (port->last_ctx_id > max_ctx)
110			port->last_ctx_id = 1;
111	}
112
113	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
114	port->running_exl = exl;
115	exl->has_run = true;
116}
117
118static void __xe_execlist_port_idle(struct xe_execlist_port *port)
119{
120	u32 noop[2] = { MI_NOOP, MI_NOOP };
121
122	xe_execlist_port_assert_held(port);
123
124	if (!port->running_exl)
125		return;
126
127	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
128	__start_lrc(port->hwe, port->lrc, 0);
129	port->running_exl = NULL;
130}
131
132static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
133{
134	struct xe_lrc *lrc = exl->q->lrc[0];
135
136	return lrc->ring.tail == lrc->ring.old_tail;
137}
138
139static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
140{
141	struct xe_execlist_exec_queue *exl = NULL;
142	int i;
143
144	xe_execlist_port_assert_held(port);
145
146	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
147		while (!list_empty(&port->active[i])) {
148			exl = list_first_entry(&port->active[i],
149					       struct xe_execlist_exec_queue,
150					       active_link);
151			list_del(&exl->active_link);
152
153			if (xe_execlist_is_idle(exl)) {
154				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
155				continue;
156			}
157
158			list_add_tail(&exl->active_link, &port->active[i]);
159			__xe_execlist_port_start(port, exl);
160			return;
161		}
162	}
163
164	__xe_execlist_port_idle(port);
165}
166
167static u64 read_execlist_status(struct xe_hw_engine *hwe)
168{
169	struct xe_gt *gt = hwe->gt;
170	u32 hi, lo;
171
172	lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
173	hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
174
175	return lo | (u64)hi << 32;
176}
177
178static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
179{
180	u64 status;
181
182	xe_execlist_port_assert_held(port);
183
184	status = read_execlist_status(port->hwe);
185	if (status & BIT(7))
186		return;
187
188	__xe_execlist_port_start_next_active(port);
189}
190
191static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
192					 u16 intr_vec)
193{
194	struct xe_execlist_port *port = hwe->exl_port;
195
196	spin_lock(&port->lock);
197	xe_execlist_port_irq_handler_locked(port);
198	spin_unlock(&port->lock);
199}
200
201static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
202					 enum xe_exec_queue_priority priority)
203{
204	xe_execlist_port_assert_held(port);
205
206	if (port->running_exl && port->running_exl->active_priority >= priority)
207		return;
208
209	__xe_execlist_port_start_next_active(port);
210}
211
212static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
213{
214	struct xe_execlist_port *port = exl->port;
215	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
216
217	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
218	XE_WARN_ON(priority < 0);
219	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
220
221	spin_lock_irq(&port->lock);
222
223	if (exl->active_priority != priority &&
224	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
225		/* Priority changed, move it to the right list */
226		list_del(&exl->active_link);
227		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
228	}
229
230	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
231		exl->active_priority = priority;
232		list_add_tail(&exl->active_link, &port->active[priority]);
233	}
234
235	xe_execlist_port_wake_locked(exl->port, priority);
236
237	spin_unlock_irq(&port->lock);
238}
239
240static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
241{
242	struct xe_execlist_port *port =
243		container_of(timer, struct xe_execlist_port, irq_fail);
244
245	spin_lock_irq(&port->lock);
246	xe_execlist_port_irq_handler_locked(port);
247	spin_unlock_irq(&port->lock);
248
249	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
250	add_timer(&port->irq_fail);
251}
252
253struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
254						 struct xe_hw_engine *hwe)
255{
256	struct drm_device *drm = &xe->drm;
257	struct xe_execlist_port *port;
258	int i, err;
259
260	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
261	if (!port) {
262		err = -ENOMEM;
263		goto err;
264	}
265
266	port->hwe = hwe;
267
268	port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
269	if (IS_ERR(port->lrc)) {
270		err = PTR_ERR(port->lrc);
271		goto err;
272	}
273
274	spin_lock_init(&port->lock);
275	for (i = 0; i < ARRAY_SIZE(port->active); i++)
276		INIT_LIST_HEAD(&port->active[i]);
277
278	port->last_ctx_id = 1;
279	port->running_exl = NULL;
280
281	hwe->irq_handler = xe_execlist_port_irq_handler;
282
283	/* TODO: Fix the interrupt code so it doesn't race like mad */
284	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
285	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
286	add_timer(&port->irq_fail);
287
288	return port;
289
290err:
291	return ERR_PTR(err);
292}
293
294void xe_execlist_port_destroy(struct xe_execlist_port *port)
295{
296	del_timer(&port->irq_fail);
297
298	/* Prevent an interrupt while we're destroying */
299	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
300	port->hwe->irq_handler = NULL;
301	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
302
303	xe_lrc_put(port->lrc);
304}
305
306static struct dma_fence *
307execlist_run_job(struct drm_sched_job *drm_job)
308{
309	struct xe_sched_job *job = to_xe_sched_job(drm_job);
310	struct xe_exec_queue *q = job->q;
311	struct xe_execlist_exec_queue *exl = job->q->execlist;
312
313	q->ring_ops->emit_job(job);
314	xe_execlist_make_active(exl);
315
316	return job->fence;
317}
318
319static void execlist_job_free(struct drm_sched_job *drm_job)
320{
321	struct xe_sched_job *job = to_xe_sched_job(drm_job);
322
323	xe_exec_queue_update_run_ticks(job->q);
324	xe_sched_job_put(job);
325}
326
327static const struct drm_sched_backend_ops drm_sched_ops = {
328	.run_job = execlist_run_job,
329	.free_job = execlist_job_free,
330};
331
332static int execlist_exec_queue_init(struct xe_exec_queue *q)
333{
334	struct drm_gpu_scheduler *sched;
335	struct xe_execlist_exec_queue *exl;
336	struct xe_device *xe = gt_to_xe(q->gt);
337	int err;
338
339	xe_assert(xe, !xe_device_uc_enabled(xe));
340
341	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
342
343	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
344	if (!exl)
345		return -ENOMEM;
346
347	exl->q = q;
348
349	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
350			     q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
351			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
352			     NULL, NULL, q->hwe->name,
353			     gt_to_xe(q->gt)->drm.dev);
354	if (err)
355		goto err_free;
356
357	sched = &exl->sched;
358	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
359	if (err)
360		goto err_sched;
361
362	exl->port = q->hwe->exl_port;
363	exl->has_run = false;
364	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
365	q->execlist = exl;
366	q->entity = &exl->entity;
367
368	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
369
370	return 0;
371
372err_sched:
373	drm_sched_fini(&exl->sched);
374err_free:
375	kfree(exl);
376	return err;
377}
378
379static void execlist_exec_queue_fini_async(struct work_struct *w)
380{
381	struct xe_execlist_exec_queue *ee =
382		container_of(w, struct xe_execlist_exec_queue, fini_async);
383	struct xe_exec_queue *q = ee->q;
384	struct xe_execlist_exec_queue *exl = q->execlist;
385	struct xe_device *xe = gt_to_xe(q->gt);
386	unsigned long flags;
387
388	xe_assert(xe, !xe_device_uc_enabled(xe));
389
390	spin_lock_irqsave(&exl->port->lock, flags);
391	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
392		list_del(&exl->active_link);
393	spin_unlock_irqrestore(&exl->port->lock, flags);
394
395	drm_sched_entity_fini(&exl->entity);
396	drm_sched_fini(&exl->sched);
397	kfree(exl);
398
399	xe_exec_queue_fini(q);
400}
401
402static void execlist_exec_queue_kill(struct xe_exec_queue *q)
403{
404	/* NIY */
405}
406
407static void execlist_exec_queue_fini(struct xe_exec_queue *q)
408{
409	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
410	queue_work(system_unbound_wq, &q->execlist->fini_async);
411}
412
413static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
414					    enum xe_exec_queue_priority priority)
415{
416	/* NIY */
417	return 0;
418}
419
420static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
421{
422	/* NIY */
423	return 0;
424}
425
426static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
427						   u32 preempt_timeout_us)
428{
429	/* NIY */
430	return 0;
431}
432
433static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
434{
435	/* NIY */
436	return 0;
437}
438
439static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
440
441{
442	/* NIY */
443	return 0;
444}
445
446static void execlist_exec_queue_resume(struct xe_exec_queue *q)
447{
448	/* NIY */
449}
450
451static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
452{
453	/* NIY */
454	return false;
455}
456
457static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
458	.init = execlist_exec_queue_init,
459	.kill = execlist_exec_queue_kill,
460	.fini = execlist_exec_queue_fini,
461	.set_priority = execlist_exec_queue_set_priority,
462	.set_timeslice = execlist_exec_queue_set_timeslice,
463	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
464	.suspend = execlist_exec_queue_suspend,
465	.suspend_wait = execlist_exec_queue_suspend_wait,
466	.resume = execlist_exec_queue_resume,
467	.reset_status = execlist_exec_queue_reset_status,
468};
469
470int xe_execlist_init(struct xe_gt *gt)
471{
472	/* GuC submission enabled, nothing to do */
473	if (xe_device_uc_enabled(gt_to_xe(gt)))
474		return 0;
475
476	gt->exec_queue_ops = &execlist_exec_queue_ops;
477
478	return 0;
479}