Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2019 Intel Corporation
  4 */
  5
  6#include "i915_drv.h"
  7#include "gt/intel_context.h"
  8#include "gt/intel_engine_pm.h"
  9#include "gt/intel_gt.h"
 10#include "gt/intel_gt_buffer_pool.h"
 11#include "gt/intel_ring.h"
 12#include "i915_gem_clflush.h"
 13#include "i915_gem_object_blt.h"
 14
 15struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 16					 struct i915_vma *vma,
 17					 u32 value)
 18{
 19	struct drm_i915_private *i915 = ce->vm->i915;
 20	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
 21	struct intel_gt_buffer_pool_node *pool;
 22	struct i915_vma *batch;
 23	u64 offset;
 24	u64 count;
 25	u64 rem;
 26	u32 size;
 27	u32 *cmd;
 28	int err;
 29
 30	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 31	intel_engine_pm_get(ce->engine);
 32
 33	count = div_u64(round_up(vma->size, block_size), block_size);
 34	size = (1 + 8 * count) * sizeof(u32);
 35	size = round_up(size, PAGE_SIZE);
 36	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
 37	if (IS_ERR(pool)) {
 38		err = PTR_ERR(pool);
 39		goto out_pm;
 40	}
 41
 42	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 43	if (IS_ERR(cmd)) {
 44		err = PTR_ERR(cmd);
 45		goto out_put;
 46	}
 47
 48	rem = vma->size;
 49	offset = vma->node.start;
 50
 51	do {
 52		u32 size = min_t(u64, rem, block_size);
 53
 54		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 55
 56		if (INTEL_GEN(i915) >= 8) {
 57			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
 58			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
 59			*cmd++ = 0;
 60			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 61			*cmd++ = lower_32_bits(offset);
 62			*cmd++ = upper_32_bits(offset);
 63			*cmd++ = value;
 64		} else {
 65			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 66			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
 67			*cmd++ = 0;
 68			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 69			*cmd++ = offset;
 70			*cmd++ = value;
 71		}
 72
 73		/* Allow ourselves to be preempted in between blocks. */
 74		*cmd++ = MI_ARB_CHECK;
 75
 76		offset += size;
 77		rem -= size;
 78	} while (rem);
 79
 80	*cmd = MI_BATCH_BUFFER_END;
 81
 82	i915_gem_object_flush_map(pool->obj);
 83	i915_gem_object_unpin_map(pool->obj);
 84
 85	intel_gt_chipset_flush(ce->vm->gt);
 86
 87	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 88	if (IS_ERR(batch)) {
 89		err = PTR_ERR(batch);
 90		goto out_put;
 91	}
 92
 93	err = i915_vma_pin(batch, 0, 0, PIN_USER);
 94	if (unlikely(err))
 95		goto out_put;
 96
 97	batch->private = pool;
 98	return batch;
 99
100out_put:
101	intel_gt_buffer_pool_put(pool);
102out_pm:
103	intel_engine_pm_put(ce->engine);
104	return ERR_PTR(err);
105}
106
107int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
108{
109	int err;
110
111	i915_vma_lock(vma);
112	err = i915_request_await_object(rq, vma->obj, false);
113	if (err == 0)
114		err = i915_vma_move_to_active(vma, rq, 0);
115	i915_vma_unlock(vma);
116	if (unlikely(err))
117		return err;
118
119	return intel_gt_buffer_pool_mark_active(vma->private, rq);
120}
121
122void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
123{
124	i915_vma_unpin(vma);
125	intel_gt_buffer_pool_put(vma->private);
126	intel_engine_pm_put(ce->engine);
127}
128
129static int
130move_obj_to_gpu(struct drm_i915_gem_object *obj,
131		struct i915_request *rq,
132		bool write)
133{
134	if (obj->cache_dirty & ~obj->cache_coherent)
135		i915_gem_clflush_object(obj, 0);
136
137	return i915_request_await_object(rq, obj, write);
138}
139
140int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
141			     struct intel_context *ce,
142			     u32 value)
143{
144	struct i915_request *rq;
145	struct i915_vma *batch;
146	struct i915_vma *vma;
147	int err;
148
149	vma = i915_vma_instance(obj, ce->vm, NULL);
150	if (IS_ERR(vma))
151		return PTR_ERR(vma);
152
153	err = i915_vma_pin(vma, 0, 0, PIN_USER);
154	if (unlikely(err))
155		return err;
156
157	batch = intel_emit_vma_fill_blt(ce, vma, value);
158	if (IS_ERR(batch)) {
159		err = PTR_ERR(batch);
160		goto out_unpin;
161	}
162
163	rq = intel_context_create_request(ce);
164	if (IS_ERR(rq)) {
165		err = PTR_ERR(rq);
166		goto out_batch;
167	}
168
169	err = intel_emit_vma_mark_active(batch, rq);
170	if (unlikely(err))
171		goto out_request;
172
173	i915_vma_lock(vma);
174	err = move_obj_to_gpu(vma->obj, rq, true);
175	if (err == 0)
176		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
177	i915_vma_unlock(vma);
178	if (unlikely(err))
179		goto out_request;
180
181	if (ce->engine->emit_init_breadcrumb)
182		err = ce->engine->emit_init_breadcrumb(rq);
183
184	if (likely(!err))
185		err = ce->engine->emit_bb_start(rq,
186						batch->node.start,
187						batch->node.size,
188						0);
189out_request:
190	if (unlikely(err))
191		i915_request_set_error_once(rq, err);
192
193	i915_request_add(rq);
194out_batch:
195	intel_emit_vma_release(ce, batch);
196out_unpin:
197	i915_vma_unpin(vma);
198	return err;
199}
200
201/* Wa_1209644611:icl,ehl */
202static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
203{
204	u32 height = size >> PAGE_SHIFT;
205
206	if (!IS_GEN(i915, 11))
207		return false;
208
209	return height % 4 == 3 && height <= 8;
210}
211
212struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
213					 struct i915_vma *src,
214					 struct i915_vma *dst)
215{
216	struct drm_i915_private *i915 = ce->vm->i915;
217	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
218	struct intel_gt_buffer_pool_node *pool;
219	struct i915_vma *batch;
220	u64 src_offset, dst_offset;
221	u64 count, rem;
222	u32 size, *cmd;
223	int err;
224
225	GEM_BUG_ON(src->size != dst->size);
226
227	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
228	intel_engine_pm_get(ce->engine);
229
230	count = div_u64(round_up(dst->size, block_size), block_size);
231	size = (1 + 11 * count) * sizeof(u32);
232	size = round_up(size, PAGE_SIZE);
233	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
234	if (IS_ERR(pool)) {
235		err = PTR_ERR(pool);
236		goto out_pm;
237	}
238
239	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
240	if (IS_ERR(cmd)) {
241		err = PTR_ERR(cmd);
242		goto out_put;
243	}
244
245	rem = src->size;
246	src_offset = src->node.start;
247	dst_offset = dst->node.start;
248
249	do {
250		size = min_t(u64, rem, block_size);
251		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
252
253		if (INTEL_GEN(i915) >= 9 &&
254		    !wa_1209644611_applies(i915, size)) {
255			*cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
256			*cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
257			*cmd++ = 0;
258			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
259			*cmd++ = lower_32_bits(dst_offset);
260			*cmd++ = upper_32_bits(dst_offset);
261			*cmd++ = 0;
262			*cmd++ = PAGE_SIZE;
263			*cmd++ = lower_32_bits(src_offset);
264			*cmd++ = upper_32_bits(src_offset);
265		} else if (INTEL_GEN(i915) >= 8) {
266			*cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
267			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
268			*cmd++ = 0;
269			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
270			*cmd++ = lower_32_bits(dst_offset);
271			*cmd++ = upper_32_bits(dst_offset);
272			*cmd++ = 0;
273			*cmd++ = PAGE_SIZE;
274			*cmd++ = lower_32_bits(src_offset);
275			*cmd++ = upper_32_bits(src_offset);
276		} else {
277			*cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
278			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
279			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
280			*cmd++ = dst_offset;
281			*cmd++ = PAGE_SIZE;
282			*cmd++ = src_offset;
283		}
284
285		/* Allow ourselves to be preempted in between blocks. */
286		*cmd++ = MI_ARB_CHECK;
287
288		src_offset += size;
289		dst_offset += size;
290		rem -= size;
291	} while (rem);
292
293	*cmd = MI_BATCH_BUFFER_END;
294
295	i915_gem_object_flush_map(pool->obj);
296	i915_gem_object_unpin_map(pool->obj);
297
298	intel_gt_chipset_flush(ce->vm->gt);
299
300	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
301	if (IS_ERR(batch)) {
302		err = PTR_ERR(batch);
303		goto out_put;
304	}
305
306	err = i915_vma_pin(batch, 0, 0, PIN_USER);
307	if (unlikely(err))
308		goto out_put;
309
310	batch->private = pool;
311	return batch;
312
313out_put:
314	intel_gt_buffer_pool_put(pool);
315out_pm:
316	intel_engine_pm_put(ce->engine);
317	return ERR_PTR(err);
318}
319
320int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
321			     struct drm_i915_gem_object *dst,
322			     struct intel_context *ce)
323{
324	struct drm_gem_object *objs[] = { &src->base, &dst->base };
325	struct i915_address_space *vm = ce->vm;
326	struct i915_vma *vma[2], *batch;
327	struct ww_acquire_ctx acquire;
328	struct i915_request *rq;
329	int err, i;
330
331	vma[0] = i915_vma_instance(src, vm, NULL);
332	if (IS_ERR(vma[0]))
333		return PTR_ERR(vma[0]);
334
335	err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
336	if (unlikely(err))
337		return err;
338
339	vma[1] = i915_vma_instance(dst, vm, NULL);
340	if (IS_ERR(vma[1]))
341		goto out_unpin_src;
342
343	err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
344	if (unlikely(err))
345		goto out_unpin_src;
346
347	batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
348	if (IS_ERR(batch)) {
349		err = PTR_ERR(batch);
350		goto out_unpin_dst;
351	}
352
353	rq = intel_context_create_request(ce);
354	if (IS_ERR(rq)) {
355		err = PTR_ERR(rq);
356		goto out_batch;
357	}
358
359	err = intel_emit_vma_mark_active(batch, rq);
360	if (unlikely(err))
361		goto out_request;
362
363	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
364	if (unlikely(err))
365		goto out_request;
366
367	for (i = 0; i < ARRAY_SIZE(vma); i++) {
368		err = move_obj_to_gpu(vma[i]->obj, rq, i);
369		if (unlikely(err))
370			goto out_unlock;
371	}
372
373	for (i = 0; i < ARRAY_SIZE(vma); i++) {
374		unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
375
376		err = i915_vma_move_to_active(vma[i], rq, flags);
377		if (unlikely(err))
378			goto out_unlock;
379	}
380
381	if (rq->engine->emit_init_breadcrumb) {
382		err = rq->engine->emit_init_breadcrumb(rq);
383		if (unlikely(err))
384			goto out_unlock;
385	}
386
387	err = rq->engine->emit_bb_start(rq,
388					batch->node.start, batch->node.size,
389					0);
390out_unlock:
391	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
392out_request:
393	if (unlikely(err))
394		i915_request_set_error_once(rq, err);
395
396	i915_request_add(rq);
397out_batch:
398	intel_emit_vma_release(ce, batch);
399out_unpin_dst:
400	i915_vma_unpin(vma[1]);
401out_unpin_src:
402	i915_vma_unpin(vma[0]);
403	return err;
404}
405
406#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
407#include "selftests/i915_gem_object_blt.c"
408#endif