gen2_engine_cs.c - drivers/gpu/drm/i915/gt/gen2_engine_cs.c - Linux source code v4.6

Note: File does not exist in v4.6.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2020 Intel Corporation
  4 */
  5
  6#include "gen2_engine_cs.h"
  7#include "i915_drv.h"
  8#include "intel_engine.h"
  9#include "intel_gpu_commands.h"
 10#include "intel_gt.h"
 11#include "intel_gt_irq.h"
 12#include "intel_ring.h"
 13
 14int gen2_emit_flush(struct i915_request *rq, u32 mode)
 15{
 16	unsigned int num_store_dw = 12;
 17	u32 cmd, *cs;
 18
 19	cmd = MI_FLUSH;
 20	if (mode & EMIT_INVALIDATE)
 21		cmd |= MI_READ_FLUSH;
 22
 23	cs = intel_ring_begin(rq, 2 + 4 * num_store_dw);
 24	if (IS_ERR(cs))
 25		return PTR_ERR(cs);
 26
 27	*cs++ = cmd;
 28	while (num_store_dw--) {
 29		*cs++ = MI_STORE_DWORD_INDEX;
 30		*cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
 31		*cs++ = 0;
 32		*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
 33	}
 34	*cs++ = cmd;
 35
 36	intel_ring_advance(rq, cs);
 37
 38	return 0;
 39}
 40
 41int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
 42{
 43	u32 cmd, *cs;
 44	int i;
 45
 46	/*
 47	 * read/write caches:
 48	 *
 49	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
 50	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
 51	 * also flushed at 2d versus 3d pipeline switches.
 52	 *
 53	 * read-only caches:
 54	 *
 55	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
 56	 * MI_READ_FLUSH is set, and is always flushed on 965.
 57	 *
 58	 * I915_GEM_DOMAIN_COMMAND may not exist?
 59	 *
 60	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
 61	 * invalidated when MI_EXE_FLUSH is set.
 62	 *
 63	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
 64	 * invalidated with every MI_FLUSH.
 65	 *
 66	 * TLBs:
 67	 *
 68	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
 69	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
 70	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
 71	 * are flushed at any MI_FLUSH.
 72	 */
 73
 74	cmd = MI_FLUSH;
 75	if (mode & EMIT_INVALIDATE) {
 76		cmd |= MI_EXE_FLUSH;
 77		if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
 78			cmd |= MI_INVALIDATE_ISP;
 79	}
 80
 81	i = 2;
 82	if (mode & EMIT_INVALIDATE)
 83		i += 20;
 84
 85	cs = intel_ring_begin(rq, i);
 86	if (IS_ERR(cs))
 87		return PTR_ERR(cs);
 88
 89	*cs++ = cmd;
 90
 91	/*
 92	 * A random delay to let the CS invalidate take effect? Without this
 93	 * delay, the GPU relocation path fails as the CS does not see
 94	 * the updated contents. Just as important, if we apply the flushes
 95	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
 96	 * write and before the invalidate on the next batch), the relocations
 97	 * still fail. This implies that is a delay following invalidation
 98	 * that is required to reset the caches as opposed to a delay to
 99	 * ensure the memory is written.
100	 */
101	if (mode & EMIT_INVALIDATE) {
102		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
103		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
104						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
105			PIPE_CONTROL_GLOBAL_GTT;
106		*cs++ = 0;
107		*cs++ = 0;
108
109		for (i = 0; i < 12; i++)
110			*cs++ = MI_FLUSH;
111
112		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
113		*cs++ = intel_gt_scratch_offset(rq->engine->gt,
114						INTEL_GT_SCRATCH_FIELD_DEFAULT) |
115			PIPE_CONTROL_GLOBAL_GTT;
116		*cs++ = 0;
117		*cs++ = 0;
118	}
119
120	*cs++ = cmd;
121
122	intel_ring_advance(rq, cs);
123
124	return 0;
125}
126
127int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
128{
129	u32 *cs;
130
131	cs = intel_ring_begin(rq, 2);
132	if (IS_ERR(cs))
133		return PTR_ERR(cs);
134
135	*cs++ = MI_FLUSH;
136	*cs++ = MI_NOOP;
137	intel_ring_advance(rq, cs);
138
139	return 0;
140}
141
142static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
143				   int flush, int post)
144{
145	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
146	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
147
148	*cs++ = MI_FLUSH;
149
150	while (flush--) {
151		*cs++ = MI_STORE_DWORD_INDEX;
152		*cs++ = I915_GEM_HWS_SCRATCH * sizeof(u32);
153		*cs++ = rq->fence.seqno;
154	}
155
156	while (post--) {
157		*cs++ = MI_STORE_DWORD_INDEX;
158		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
159		*cs++ = rq->fence.seqno;
160	}
161
162	*cs++ = MI_USER_INTERRUPT;
163
164	rq->tail = intel_ring_offset(rq, cs);
165	assert_ring_tail_valid(rq->ring, rq->tail);
166
167	return cs;
168}
169
170u32 *gen3_emit_breadcrumb(struct i915_request *rq, u32 *cs)
171{
172	return __gen2_emit_breadcrumb(rq, cs, 16, 8);
173}
174
175u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
176{
177	return __gen2_emit_breadcrumb(rq, cs, 8, 8);
178}
179
180/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
181#define I830_BATCH_LIMIT SZ_256K
182#define I830_TLB_ENTRIES (2)
183#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
184int i830_emit_bb_start(struct i915_request *rq,
185		       u64 offset, u32 len,
186		       unsigned int dispatch_flags)
187{
188	u32 *cs, cs_offset =
189		intel_gt_scratch_offset(rq->engine->gt,
190					INTEL_GT_SCRATCH_FIELD_DEFAULT);
191
192	GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
193
194	cs = intel_ring_begin(rq, 6);
195	if (IS_ERR(cs))
196		return PTR_ERR(cs);
197
198	/* Evict the invalid PTE TLBs */
199	*cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA;
200	*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096;
201	*cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */
202	*cs++ = cs_offset;
203	*cs++ = 0xdeadbeef;
204	*cs++ = MI_NOOP;
205	intel_ring_advance(rq, cs);
206
207	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
208		if (len > I830_BATCH_LIMIT)
209			return -ENOSPC;
210
211		cs = intel_ring_begin(rq, 6 + 2);
212		if (IS_ERR(cs))
213			return PTR_ERR(cs);
214
215		/*
216		 * Blit the batch (which has now all relocs applied) to the
217		 * stable batch scratch bo area (so that the CS never
218		 * stumbles over its tlb invalidation bug) ...
219		 */
220		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
221		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
222		*cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
223		*cs++ = cs_offset;
224		*cs++ = 4096;
225		*cs++ = offset;
226
227		*cs++ = MI_FLUSH;
228		*cs++ = MI_NOOP;
229		intel_ring_advance(rq, cs);
230
231		/* ... and execute it. */
232		offset = cs_offset;
233	}
234
235	if (!(dispatch_flags & I915_DISPATCH_SECURE))
236		offset |= MI_BATCH_NON_SECURE;
237
238	cs = intel_ring_begin(rq, 2);
239	if (IS_ERR(cs))
240		return PTR_ERR(cs);
241
242	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
243	*cs++ = offset;
244	intel_ring_advance(rq, cs);
245
246	return 0;
247}
248
249int gen3_emit_bb_start(struct i915_request *rq,
250		       u64 offset, u32 len,
251		       unsigned int dispatch_flags)
252{
253	u32 *cs;
254
255	if (!(dispatch_flags & I915_DISPATCH_SECURE))
256		offset |= MI_BATCH_NON_SECURE;
257
258	cs = intel_ring_begin(rq, 2);
259	if (IS_ERR(cs))
260		return PTR_ERR(cs);
261
262	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
263	*cs++ = offset;
264	intel_ring_advance(rq, cs);
265
266	return 0;
267}
268
269int gen4_emit_bb_start(struct i915_request *rq,
270		       u64 offset, u32 length,
271		       unsigned int dispatch_flags)
272{
273	u32 security;
274	u32 *cs;
275
276	security = MI_BATCH_NON_SECURE_I965;
277	if (dispatch_flags & I915_DISPATCH_SECURE)
278		security = 0;
279
280	cs = intel_ring_begin(rq, 2);
281	if (IS_ERR(cs))
282		return PTR_ERR(cs);
283
284	*cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | security;
285	*cs++ = offset;
286	intel_ring_advance(rq, cs);
287
288	return 0;
289}
290
291void gen2_irq_enable(struct intel_engine_cs *engine)
292{
293	struct drm_i915_private *i915 = engine->i915;
294
295	i915->irq_mask &= ~engine->irq_enable_mask;
296	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
297	ENGINE_POSTING_READ16(engine, RING_IMR);
298}
299
300void gen2_irq_disable(struct intel_engine_cs *engine)
301{
302	struct drm_i915_private *i915 = engine->i915;
303
304	i915->irq_mask |= engine->irq_enable_mask;
305	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
306}
307
308void gen3_irq_enable(struct intel_engine_cs *engine)
309{
310	engine->i915->irq_mask &= ~engine->irq_enable_mask;
311	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
312	intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
313}
314
315void gen3_irq_disable(struct intel_engine_cs *engine)
316{
317	engine->i915->irq_mask |= engine->irq_enable_mask;
318	intel_uncore_write(engine->uncore, GEN2_IMR, engine->i915->irq_mask);
319}
320
321void gen5_irq_enable(struct intel_engine_cs *engine)
322{
323	gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
324}
325
326void gen5_irq_disable(struct intel_engine_cs *engine)
327{
328	gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
329}