execlist.c - drivers/gpu/drm/i915/gvt/execlist.c - Linux source code v4.6

Note: File does not exist in v4.6.
  1/*
  2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice (including the next
 12 * paragraph) shall be included in all copies or substantial portions of the
 13 * Software.
 14 *
 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 * SOFTWARE.
 22 *
 23 * Authors:
 24 *    Zhiyuan Lv <zhiyuan.lv@intel.com>
 25 *    Zhi Wang <zhi.a.wang@intel.com>
 26 *
 27 * Contributors:
 28 *    Min He <min.he@intel.com>
 29 *    Bing Niu <bing.niu@intel.com>
 30 *    Ping Gao <ping.a.gao@intel.com>
 31 *    Tina Zhang <tina.zhang@intel.com>
 32 *
 33 */
 34
 35#include "i915_drv.h"
 36#include "gvt.h"
 37
 38#define _EL_OFFSET_STATUS       0x234
 39#define _EL_OFFSET_STATUS_BUF   0x370
 40#define _EL_OFFSET_STATUS_PTR   0x3A0
 41
 42#define execlist_ring_mmio(gvt, ring_id, offset) \
 43	(gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
 44
 45#define valid_context(ctx) ((ctx)->valid)
 46#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
 47		((a)->lrca == (b)->lrca))
 48
 49static int context_switch_events[] = {
 50	[RCS] = RCS_AS_CONTEXT_SWITCH,
 51	[BCS] = BCS_AS_CONTEXT_SWITCH,
 52	[VCS] = VCS_AS_CONTEXT_SWITCH,
 53	[VCS2] = VCS2_AS_CONTEXT_SWITCH,
 54	[VECS] = VECS_AS_CONTEXT_SWITCH,
 55};
 56
 57static int ring_id_to_context_switch_event(int ring_id)
 58{
 59	if (WARN_ON(ring_id < RCS && ring_id >
 60				ARRAY_SIZE(context_switch_events)))
 61		return -EINVAL;
 62
 63	return context_switch_events[ring_id];
 64}
 65
 66static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
 67{
 68	gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
 69			execlist->running_slot ?
 70			execlist->running_slot->index : -1,
 71			execlist->running_context ?
 72			execlist->running_context->context_id : 0,
 73			execlist->pending_slot ?
 74			execlist->pending_slot->index : -1);
 75
 76	execlist->running_slot = execlist->pending_slot;
 77	execlist->pending_slot = NULL;
 78	execlist->running_context = execlist->running_context ?
 79		&execlist->running_slot->ctx[0] : NULL;
 80
 81	gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
 82			execlist->running_slot ?
 83			execlist->running_slot->index : -1,
 84			execlist->running_context ?
 85			execlist->running_context->context_id : 0,
 86			execlist->pending_slot ?
 87			execlist->pending_slot->index : -1);
 88}
 89
 90static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
 91{
 92	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 93	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
 94	struct execlist_ctx_descriptor_format *desc = execlist->running_context;
 95	struct intel_vgpu *vgpu = execlist->vgpu;
 96	struct execlist_status_format status;
 97	int ring_id = execlist->ring_id;
 98	u32 status_reg = execlist_ring_mmio(vgpu->gvt,
 99			ring_id, _EL_OFFSET_STATUS);
100
101	status.ldw = vgpu_vreg(vgpu, status_reg);
102	status.udw = vgpu_vreg(vgpu, status_reg + 4);
103
104	if (running) {
105		status.current_execlist_pointer = !!running->index;
106		status.execlist_write_pointer = !!!running->index;
107		status.execlist_0_active = status.execlist_0_valid =
108			!!!(running->index);
109		status.execlist_1_active = status.execlist_1_valid =
110			!!(running->index);
111	} else {
112		status.context_id = 0;
113		status.execlist_0_active = status.execlist_0_valid = 0;
114		status.execlist_1_active = status.execlist_1_valid = 0;
115	}
116
117	status.context_id = desc ? desc->context_id : 0;
118	status.execlist_queue_full = !!(pending);
119
120	vgpu_vreg(vgpu, status_reg) = status.ldw;
121	vgpu_vreg(vgpu, status_reg + 4) = status.udw;
122
123	gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
124		vgpu->id, status_reg, status.ldw, status.udw);
125}
126
127static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
128		struct execlist_context_status_format *status,
129		bool trigger_interrupt_later)
130{
131	struct intel_vgpu *vgpu = execlist->vgpu;
132	int ring_id = execlist->ring_id;
133	struct execlist_context_status_pointer_format ctx_status_ptr;
134	u32 write_pointer;
135	u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
136
137	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
138			_EL_OFFSET_STATUS_PTR);
139	ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
140			_EL_OFFSET_STATUS_BUF);
141
142	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
143
144	write_pointer = ctx_status_ptr.write_ptr;
145
146	if (write_pointer == 0x7)
147		write_pointer = 0;
148	else {
149		++write_pointer;
150		write_pointer %= 0x6;
151	}
152
153	offset = ctx_status_buf_reg + write_pointer * 8;
154
155	vgpu_vreg(vgpu, offset) = status->ldw;
156	vgpu_vreg(vgpu, offset + 4) = status->udw;
157
158	ctx_status_ptr.write_ptr = write_pointer;
159	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
160
161	gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
162		vgpu->id, write_pointer, offset, status->ldw, status->udw);
163
164	if (trigger_interrupt_later)
165		return;
166
167	intel_vgpu_trigger_virtual_event(vgpu,
168			ring_id_to_context_switch_event(execlist->ring_id));
169}
170
171static int emulate_execlist_ctx_schedule_out(
172		struct intel_vgpu_execlist *execlist,
173		struct execlist_ctx_descriptor_format *ctx)
174{
175	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
176	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
177	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
178	struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
179	struct execlist_context_status_format status;
180
181	memset(&status, 0, sizeof(status));
182
183	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
184
185	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
186		gvt_err("schedule out context is not running context,"
187				"ctx id %x running ctx id %x\n",
188				ctx->context_id,
189				execlist->running_context->context_id);
190		return -EINVAL;
191	}
192
193	/* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
194	if (valid_context(ctx1) && same_context(ctx0, ctx)) {
195		gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
196
197		execlist->running_context = ctx1;
198
199		emulate_execlist_status(execlist);
200
201		status.context_complete = status.element_switch = 1;
202		status.context_id = ctx->context_id;
203
204		emulate_csb_update(execlist, &status, false);
205		/*
206		 * ctx1 is not valid, ctx == ctx0
207		 * ctx1 is valid, ctx1 == ctx
208		 *	--> last element is finished
209		 * emulate:
210		 *	active-to-idle if there is *no* pending execlist
211		 *	context-complete if there *is* pending execlist
212		 */
213	} else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
214			|| (valid_context(ctx1) && same_context(ctx1, ctx))) {
215		gvt_dbg_el("need to switch virtual execlist slot\n");
216
217		switch_virtual_execlist_slot(execlist);
218
219		emulate_execlist_status(execlist);
220
221		status.context_complete = status.active_to_idle = 1;
222		status.context_id = ctx->context_id;
223
224		if (!pending) {
225			emulate_csb_update(execlist, &status, false);
226		} else {
227			emulate_csb_update(execlist, &status, true);
228
229			memset(&status, 0, sizeof(status));
230
231			status.idle_to_active = 1;
232			status.context_id = 0;
233
234			emulate_csb_update(execlist, &status, false);
235		}
236	} else {
237		WARN_ON(1);
238		return -EINVAL;
239	}
240
241	return 0;
242}
243
244static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
245		struct intel_vgpu_execlist *execlist)
246{
247	struct intel_vgpu *vgpu = execlist->vgpu;
248	int ring_id = execlist->ring_id;
249	u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
250			_EL_OFFSET_STATUS);
251	struct execlist_status_format status;
252
253	status.ldw = vgpu_vreg(vgpu, status_reg);
254	status.udw = vgpu_vreg(vgpu, status_reg + 4);
255
256	if (status.execlist_queue_full) {
257		gvt_err("virtual execlist slots are full\n");
258		return NULL;
259	}
260
261	return &execlist->slot[status.execlist_write_pointer];
262}
263
264static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
265		struct execlist_ctx_descriptor_format ctx[2])
266{
267	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
268	struct intel_vgpu_execlist_slot *slot =
269		get_next_execlist_slot(execlist);
270
271	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
272	struct execlist_context_status_format status;
273
274	gvt_dbg_el("emulate schedule-in\n");
275
276	if (!slot) {
277		gvt_err("no available execlist slot\n");
278		return -EINVAL;
279	}
280
281	memset(&status, 0, sizeof(status));
282	memset(slot->ctx, 0, sizeof(slot->ctx));
283
284	slot->ctx[0] = ctx[0];
285	slot->ctx[1] = ctx[1];
286
287	gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
288			slot->index, ctx[0].context_id,
289			ctx[1].context_id);
290
291	/*
292	 * no running execlist, make this write bundle as running execlist
293	 * -> idle-to-active
294	 */
295	if (!running) {
296		gvt_dbg_el("no current running execlist\n");
297
298		execlist->running_slot = slot;
299		execlist->pending_slot = NULL;
300		execlist->running_context = &slot->ctx[0];
301
302		gvt_dbg_el("running slot index %d running context %x\n",
303				execlist->running_slot->index,
304				execlist->running_context->context_id);
305
306		emulate_execlist_status(execlist);
307
308		status.idle_to_active = 1;
309		status.context_id = 0;
310
311		emulate_csb_update(execlist, &status, false);
312		return 0;
313	}
314
315	ctx0 = &running->ctx[0];
316	ctx1 = &running->ctx[1];
317
318	gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
319		running->index, ctx0->context_id, ctx1->context_id);
320
321	/*
322	 * already has an running execlist
323	 *	a. running ctx1 is valid,
324	 *	   ctx0 is finished, and running ctx1 == new execlist ctx[0]
325	 *	b. running ctx1 is not valid,
326	 *	   ctx0 == new execlist ctx[0]
327	 * ----> lite-restore + preempted
328	 */
329	if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
330		/* condition a */
331		(!same_context(ctx0, execlist->running_context))) ||
332			(!valid_context(ctx1) &&
333			 same_context(ctx0, &slot->ctx[0]))) { /* condition b */
334		gvt_dbg_el("need to switch virtual execlist slot\n");
335
336		execlist->pending_slot = slot;
337		switch_virtual_execlist_slot(execlist);
338
339		emulate_execlist_status(execlist);
340
341		status.lite_restore = status.preempted = 1;
342		status.context_id = ctx[0].context_id;
343
344		emulate_csb_update(execlist, &status, false);
345	} else {
346		gvt_dbg_el("emulate as pending slot\n");
347		/*
348		 * otherwise
349		 * --> emulate pending execlist exist + but no preemption case
350		 */
351		execlist->pending_slot = slot;
352		emulate_execlist_status(execlist);
353	}
354	return 0;
355}
356
357static void free_workload(struct intel_vgpu_workload *workload)
358{
359	intel_vgpu_unpin_mm(workload->shadow_mm);
360	intel_gvt_mm_unreference(workload->shadow_mm);
361	kmem_cache_free(workload->vgpu->workloads, workload);
362}
363
364#define get_desc_from_elsp_dwords(ed, i) \
365	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
366
367static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
368{
369	const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
370	struct intel_shadow_bb_entry *entry_obj;
371
372	/* pin the gem object to ggtt */
373	list_for_each_entry(entry_obj, &workload->shadow_bb, list) {
374		struct i915_vma *vma;
375
376		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
377		if (IS_ERR(vma)) {
378			gvt_err("Cannot pin\n");
379			return;
380		}
381
382		/* FIXME: we are not tracking our pinned VMA leaving it
383		 * up to the core to fix up the stray pin_count upon
384		 * free.
385		 */
386
387		/* update the relocate gma with shadow batch buffer*/
388		entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma);
389		if (gmadr_bytes == 8)
390			entry_obj->bb_start_cmd_va[2] = 0;
391	}
392}
393
394static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
395{
396	int ring_id = wa_ctx->workload->ring_id;
397	struct i915_gem_context *shadow_ctx =
398		wa_ctx->workload->vgpu->shadow_ctx;
399	struct drm_i915_gem_object *ctx_obj =
400		shadow_ctx->engine[ring_id].state->obj;
401	struct execlist_ring_context *shadow_ring_context;
402	struct page *page;
403
404	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
405	shadow_ring_context = kmap_atomic(page);
406
407	shadow_ring_context->bb_per_ctx_ptr.val =
408		(shadow_ring_context->bb_per_ctx_ptr.val &
409		(~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma;
410	shadow_ring_context->rcs_indirect_ctx.val =
411		(shadow_ring_context->rcs_indirect_ctx.val &
412		(~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
413
414	kunmap_atomic(shadow_ring_context);
415	return 0;
416}
417
418static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
419{
420	struct i915_vma *vma;
421	unsigned char *per_ctx_va =
422		(unsigned char *)wa_ctx->indirect_ctx.shadow_va +
423		wa_ctx->indirect_ctx.size;
424
425	if (wa_ctx->indirect_ctx.size == 0)
426		return;
427
428	vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
429				       0, CACHELINE_BYTES, 0);
430	if (IS_ERR(vma)) {
431		gvt_err("Cannot pin indirect ctx obj\n");
432		return;
433	}
434
435	/* FIXME: we are not tracking our pinned VMA leaving it
436	 * up to the core to fix up the stray pin_count upon
437	 * free.
438	 */
439
440	wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma);
441
442	wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1);
443	memset(per_ctx_va, 0, CACHELINE_BYTES);
444
445	update_wa_ctx_2_shadow_ctx(wa_ctx);
446}
447
448static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
449{
450	struct intel_vgpu *vgpu = workload->vgpu;
451	struct execlist_ctx_descriptor_format ctx[2];
452	int ring_id = workload->ring_id;
453
454	intel_vgpu_pin_mm(workload->shadow_mm);
455	intel_vgpu_sync_oos_pages(workload->vgpu);
456	intel_vgpu_flush_post_shadow(workload->vgpu);
457	prepare_shadow_batch_buffer(workload);
458	prepare_shadow_wa_ctx(&workload->wa_ctx);
459	if (!workload->emulate_schedule_in)
460		return 0;
461
462	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
463	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
464
465	return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
466}
467
468static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
469{
470	/* release all the shadow batch buffer */
471	if (!list_empty(&workload->shadow_bb)) {
472		struct intel_shadow_bb_entry *entry_obj =
473			list_first_entry(&workload->shadow_bb,
474					 struct intel_shadow_bb_entry,
475					 list);
476		struct intel_shadow_bb_entry *temp;
477
478		list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb,
479					 list) {
480			i915_gem_object_unpin_map(entry_obj->obj);
481			i915_gem_object_put(entry_obj->obj);
482			list_del(&entry_obj->list);
483			kfree(entry_obj);
484		}
485	}
486}
487
488static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
489{
490	if (wa_ctx->indirect_ctx.size == 0)
491		return;
492
493	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
494	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
495}
496
497static int complete_execlist_workload(struct intel_vgpu_workload *workload)
498{
499	struct intel_vgpu *vgpu = workload->vgpu;
500	struct intel_vgpu_execlist *execlist =
501		&vgpu->execlist[workload->ring_id];
502	struct intel_vgpu_workload *next_workload;
503	struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next;
504	bool lite_restore = false;
505	int ret;
506
507	gvt_dbg_el("complete workload %p status %d\n", workload,
508			workload->status);
509
510	release_shadow_batch_buffer(workload);
511	release_shadow_wa_ctx(&workload->wa_ctx);
512
513	if (workload->status || vgpu->resetting)
514		goto out;
515
516	if (!list_empty(workload_q_head(vgpu, workload->ring_id))) {
517		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
518
519		next_workload = container_of(next,
520				struct intel_vgpu_workload, list);
521		this_desc = &workload->ctx_desc;
522		next_desc = &next_workload->ctx_desc;
523
524		lite_restore = same_context(this_desc, next_desc);
525	}
526
527	if (lite_restore) {
528		gvt_dbg_el("next context == current - no schedule-out\n");
529		free_workload(workload);
530		return 0;
531	}
532
533	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
534	if (ret)
535		goto err;
536out:
537	free_workload(workload);
538	return 0;
539err:
540	free_workload(workload);
541	return ret;
542}
543
544#define RING_CTX_OFF(x) \
545	offsetof(struct execlist_ring_context, x)
546
547static void read_guest_pdps(struct intel_vgpu *vgpu,
548		u64 ring_context_gpa, u32 pdp[8])
549{
550	u64 gpa;
551	int i;
552
553	gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
554
555	for (i = 0; i < 8; i++)
556		intel_gvt_hypervisor_read_gpa(vgpu,
557				gpa + i * 8, &pdp[7 - i], 4);
558}
559
560static int prepare_mm(struct intel_vgpu_workload *workload)
561{
562	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
563	struct intel_vgpu_mm *mm;
564	int page_table_level;
565	u32 pdp[8];
566
567	if (desc->addressing_mode == 1) { /* legacy 32-bit */
568		page_table_level = 3;
569	} else if (desc->addressing_mode == 3) { /* legacy 64 bit */
570		page_table_level = 4;
571	} else {
572		gvt_err("Advanced Context mode(SVM) is not supported!\n");
573		return -EINVAL;
574	}
575
576	read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
577
578	mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
579	if (mm) {
580		intel_gvt_mm_reference(mm);
581	} else {
582
583		mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
584				pdp, page_table_level, 0);
585		if (IS_ERR(mm)) {
586			gvt_err("fail to create mm object.\n");
587			return PTR_ERR(mm);
588		}
589	}
590	workload->shadow_mm = mm;
591	return 0;
592}
593
594#define get_last_workload(q) \
595	(list_empty(q) ? NULL : container_of(q->prev, \
596	struct intel_vgpu_workload, list))
597
598static int submit_context(struct intel_vgpu *vgpu, int ring_id,
599		struct execlist_ctx_descriptor_format *desc,
600		bool emulate_schedule_in)
601{
602	struct list_head *q = workload_q_head(vgpu, ring_id);
603	struct intel_vgpu_workload *last_workload = get_last_workload(q);
604	struct intel_vgpu_workload *workload = NULL;
605	u64 ring_context_gpa;
606	u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
607	int ret;
608
609	ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
610			(u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
611	if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
612		gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
613		return -EINVAL;
614	}
615
616	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
617			RING_CTX_OFF(ring_header.val), &head, 4);
618
619	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
620			RING_CTX_OFF(ring_tail.val), &tail, 4);
621
622	head &= RB_HEAD_OFF_MASK;
623	tail &= RB_TAIL_OFF_MASK;
624
625	if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
626		gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
627		gvt_dbg_el("ctx head %x real head %lx\n", head,
628				last_workload->rb_tail);
629		/*
630		 * cannot use guest context head pointer here,
631		 * as it might not be updated at this time
632		 */
633		head = last_workload->rb_tail;
634	}
635
636	gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
637
638	workload = kmem_cache_zalloc(vgpu->workloads, GFP_KERNEL);
639	if (!workload)
640		return -ENOMEM;
641
642	/* record some ring buffer register values for scan and shadow */
643	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
644			RING_CTX_OFF(rb_start.val), &start, 4);
645	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
646			RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
647	intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
648			RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
649
650	INIT_LIST_HEAD(&workload->list);
651	INIT_LIST_HEAD(&workload->shadow_bb);
652
653	init_waitqueue_head(&workload->shadow_ctx_status_wq);
654	atomic_set(&workload->shadow_ctx_active, 0);
655
656	workload->vgpu = vgpu;
657	workload->ring_id = ring_id;
658	workload->ctx_desc = *desc;
659	workload->ring_context_gpa = ring_context_gpa;
660	workload->rb_head = head;
661	workload->rb_tail = tail;
662	workload->rb_start = start;
663	workload->rb_ctl = ctl;
664	workload->prepare = prepare_execlist_workload;
665	workload->complete = complete_execlist_workload;
666	workload->status = -EINPROGRESS;
667	workload->emulate_schedule_in = emulate_schedule_in;
668
669	if (ring_id == RCS) {
670		intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
671			RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
672		intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
673			RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
674
675		workload->wa_ctx.indirect_ctx.guest_gma =
676			indirect_ctx & INDIRECT_CTX_ADDR_MASK;
677		workload->wa_ctx.indirect_ctx.size =
678			(indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
679			CACHELINE_BYTES;
680		workload->wa_ctx.per_ctx.guest_gma =
681			per_ctx & PER_CTX_ADDR_MASK;
682		workload->wa_ctx.workload = workload;
683
684		WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1));
685	}
686
687	if (emulate_schedule_in)
688		memcpy(&workload->elsp_dwords,
689				&vgpu->execlist[ring_id].elsp_dwords,
690				sizeof(workload->elsp_dwords));
691
692	gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
693			workload, ring_id, head, tail, start, ctl);
694
695	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
696			emulate_schedule_in);
697
698	ret = prepare_mm(workload);
699	if (ret) {
700		kmem_cache_free(vgpu->workloads, workload);
701		return ret;
702	}
703
704	queue_workload(workload);
705	return 0;
706}
707
708int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
709{
710	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
711	struct execlist_ctx_descriptor_format *desc[2], valid_desc[2];
712	unsigned long valid_desc_bitmap = 0;
713	bool emulate_schedule_in = true;
714	int ret;
715	int i;
716
717	memset(valid_desc, 0, sizeof(valid_desc));
718
719	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
720	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
721
722	for (i = 0; i < 2; i++) {
723		if (!desc[i]->valid)
724			continue;
725
726		if (!desc[i]->privilege_access) {
727			gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
728					vgpu->id);
729			return -EINVAL;
730		}
731
732		/* TODO: add another guest context checks here. */
733		set_bit(i, &valid_desc_bitmap);
734		valid_desc[i] = *desc[i];
735	}
736
737	if (!valid_desc_bitmap) {
738		gvt_err("vgpu%d: no valid desc in a elsp submission\n",
739				vgpu->id);
740		return -EINVAL;
741	}
742
743	if (!test_bit(0, (void *)&valid_desc_bitmap) &&
744			test_bit(1, (void *)&valid_desc_bitmap)) {
745		gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
746				vgpu->id);
747		return -EINVAL;
748	}
749
750	/* submit workload */
751	for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) {
752		ret = submit_context(vgpu, ring_id, &valid_desc[i],
753				emulate_schedule_in);
754		if (ret) {
755			gvt_err("vgpu%d: fail to schedule workload\n",
756					vgpu->id);
757			return ret;
758		}
759		emulate_schedule_in = false;
760	}
761	return 0;
762}
763
764static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
765{
766	struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
767	struct execlist_context_status_pointer_format ctx_status_ptr;
768	u32 ctx_status_ptr_reg;
769
770	memset(execlist, 0, sizeof(*execlist));
771
772	execlist->vgpu = vgpu;
773	execlist->ring_id = ring_id;
774	execlist->slot[0].index = 0;
775	execlist->slot[1].index = 1;
776
777	ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
778			_EL_OFFSET_STATUS_PTR);
779
780	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
781	ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7;
782	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
783}
784
785void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
786{
787	kmem_cache_destroy(vgpu->workloads);
788}
789
790int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
791{
792	enum intel_engine_id i;
793	struct intel_engine_cs *engine;
794
795	/* each ring has a virtual execlist engine */
796	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
797		init_vgpu_execlist(vgpu, i);
798		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
799	}
800
801	vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload",
802			sizeof(struct intel_vgpu_workload), 0,
803			SLAB_HWCACHE_ALIGN,
804			NULL);
805
806	if (!vgpu->workloads)
807		return -ENOMEM;
808
809	return 0;
810}
811
812void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
813		unsigned long engine_mask)
814{
815	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
816	struct intel_engine_cs *engine;
817	struct intel_vgpu_workload *pos, *n;
818	unsigned int tmp;
819
820	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
821		/* free the unsubmited workload in the queue */
822		list_for_each_entry_safe(pos, n,
823			&vgpu->workload_q_head[engine->id], list) {
824			list_del_init(&pos->list);
825			free_workload(pos);
826		}
827
828		init_vgpu_execlist(vgpu, engine->id);
829	}
830}