i915_gem_object_blt.c - drivers/gpu/drm/i915/gem/i915_gem_object_blt.c - Linux source code v3.1

Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2019 Intel Corporation
  4 */
  5
  6#include "i915_drv.h"
  7#include "gt/intel_context.h"
  8#include "gt/intel_engine_pm.h"
  9#include "gt/intel_engine_pool.h"
 10#include "gt/intel_gt.h"
 11#include "i915_gem_clflush.h"
 12#include "i915_gem_object_blt.h"
 13
 14struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 15					 struct i915_vma *vma,
 16					 u32 value)
 17{
 18	struct drm_i915_private *i915 = ce->vm->i915;
 19	const u32 block_size = S16_MAX * PAGE_SIZE;
 20	struct intel_engine_pool_node *pool;
 21	struct i915_vma *batch;
 22	u64 offset;
 23	u64 count;
 24	u64 rem;
 25	u32 size;
 26	u32 *cmd;
 27	int err;
 28
 29	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 30	intel_engine_pm_get(ce->engine);
 31
 32	count = div_u64(vma->size, block_size);
 33	size = (1 + 8 * count) * sizeof(u32);
 34	size = round_up(size, PAGE_SIZE);
 35	pool = intel_engine_pool_get(&ce->engine->pool, size);
 36	if (IS_ERR(pool)) {
 37		err = PTR_ERR(pool);
 38		goto out_pm;
 39	}
 40
 41	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 42	if (IS_ERR(cmd)) {
 43		err = PTR_ERR(cmd);
 44		goto out_put;
 45	}
 46
 47	rem = vma->size;
 48	offset = vma->node.start;
 49
 50	do {
 51		u32 size = min_t(u64, rem, block_size);
 52
 53		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 54
 55		if (INTEL_GEN(i915) >= 8) {
 56			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
 57			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
 58			*cmd++ = 0;
 59			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 60			*cmd++ = lower_32_bits(offset);
 61			*cmd++ = upper_32_bits(offset);
 62			*cmd++ = value;
 63		} else {
 64			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 65			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
 66			*cmd++ = 0;
 67			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 68			*cmd++ = offset;
 69			*cmd++ = value;
 70		}
 71
 72		/* Allow ourselves to be preempted in between blocks. */
 73		*cmd++ = MI_ARB_CHECK;
 74
 75		offset += size;
 76		rem -= size;
 77	} while (rem);
 78
 79	*cmd = MI_BATCH_BUFFER_END;
 80	intel_gt_chipset_flush(ce->vm->gt);
 81
 82	i915_gem_object_unpin_map(pool->obj);
 83
 84	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 85	if (IS_ERR(batch)) {
 86		err = PTR_ERR(batch);
 87		goto out_put;
 88	}
 89
 90	err = i915_vma_pin(batch, 0, 0, PIN_USER);
 91	if (unlikely(err))
 92		goto out_put;
 93
 94	batch->private = pool;
 95	return batch;
 96
 97out_put:
 98	intel_engine_pool_put(pool);
 99out_pm:
100	intel_engine_pm_put(ce->engine);
101	return ERR_PTR(err);
102}
103
104int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
105{
106	int err;
107
108	i915_vma_lock(vma);
109	err = i915_request_await_object(rq, vma->obj, false);
110	if (err == 0)
111		err = i915_vma_move_to_active(vma, rq, 0);
112	i915_vma_unlock(vma);
113	if (unlikely(err))
114		return err;
115
116	return intel_engine_pool_mark_active(vma->private, rq);
117}
118
119void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
120{
121	i915_vma_unpin(vma);
122	intel_engine_pool_put(vma->private);
123	intel_engine_pm_put(ce->engine);
124}
125
126int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
127			     struct intel_context *ce,
128			     u32 value)
129{
130	struct i915_request *rq;
131	struct i915_vma *batch;
132	struct i915_vma *vma;
133	int err;
134
135	vma = i915_vma_instance(obj, ce->vm, NULL);
136	if (IS_ERR(vma))
137		return PTR_ERR(vma);
138
139	err = i915_vma_pin(vma, 0, 0, PIN_USER);
140	if (unlikely(err))
141		return err;
142
143	if (obj->cache_dirty & ~obj->cache_coherent) {
144		i915_gem_object_lock(obj);
145		i915_gem_clflush_object(obj, 0);
146		i915_gem_object_unlock(obj);
147	}
148
149	batch = intel_emit_vma_fill_blt(ce, vma, value);
150	if (IS_ERR(batch)) {
151		err = PTR_ERR(batch);
152		goto out_unpin;
153	}
154
155	rq = intel_context_create_request(ce);
156	if (IS_ERR(rq)) {
157		err = PTR_ERR(rq);
158		goto out_batch;
159	}
160
161	err = intel_emit_vma_mark_active(batch, rq);
162	if (unlikely(err))
163		goto out_request;
164
165	err = i915_request_await_object(rq, obj, true);
166	if (unlikely(err))
167		goto out_request;
168
169	if (ce->engine->emit_init_breadcrumb) {
170		err = ce->engine->emit_init_breadcrumb(rq);
171		if (unlikely(err))
172			goto out_request;
173	}
174
175	i915_vma_lock(vma);
176	err = i915_request_await_object(rq, vma->obj, true);
177	if (err == 0)
178		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
179	i915_vma_unlock(vma);
180	if (unlikely(err))
181		goto out_request;
182
183	err = ce->engine->emit_bb_start(rq,
184					batch->node.start, batch->node.size,
185					0);
186out_request:
187	if (unlikely(err))
188		i915_request_skip(rq, err);
189
190	i915_request_add(rq);
191out_batch:
192	intel_emit_vma_release(ce, batch);
193out_unpin:
194	i915_vma_unpin(vma);
195	return err;
196}
197
198struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
199					 struct i915_vma *src,
200					 struct i915_vma *dst)
201{
202	struct drm_i915_private *i915 = ce->vm->i915;
203	const u32 block_size = S16_MAX * PAGE_SIZE;
204	struct intel_engine_pool_node *pool;
205	struct i915_vma *batch;
206	u64 src_offset, dst_offset;
207	u64 count, rem;
208	u32 size, *cmd;
209	int err;
210
211	GEM_BUG_ON(src->size != dst->size);
212
213	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
214	intel_engine_pm_get(ce->engine);
215
216	count = div_u64(dst->size, block_size);
217	size = (1 + 11 * count) * sizeof(u32);
218	size = round_up(size, PAGE_SIZE);
219	pool = intel_engine_pool_get(&ce->engine->pool, size);
220	if (IS_ERR(pool)) {
221		err = PTR_ERR(pool);
222		goto out_pm;
223	}
224
225	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
226	if (IS_ERR(cmd)) {
227		err = PTR_ERR(cmd);
228		goto out_put;
229	}
230
231	rem = src->size;
232	src_offset = src->node.start;
233	dst_offset = dst->node.start;
234
235	do {
236		size = min_t(u64, rem, block_size);
237		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
238
239		if (INTEL_GEN(i915) >= 9) {
240			*cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
241			*cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
242			*cmd++ = 0;
243			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
244			*cmd++ = lower_32_bits(dst_offset);
245			*cmd++ = upper_32_bits(dst_offset);
246			*cmd++ = 0;
247			*cmd++ = PAGE_SIZE;
248			*cmd++ = lower_32_bits(src_offset);
249			*cmd++ = upper_32_bits(src_offset);
250		} else if (INTEL_GEN(i915) >= 8) {
251			*cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
252			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
253			*cmd++ = 0;
254			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
255			*cmd++ = lower_32_bits(dst_offset);
256			*cmd++ = upper_32_bits(dst_offset);
257			*cmd++ = 0;
258			*cmd++ = PAGE_SIZE;
259			*cmd++ = lower_32_bits(src_offset);
260			*cmd++ = upper_32_bits(src_offset);
261		} else {
262			*cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
263			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
264			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
265			*cmd++ = dst_offset;
266			*cmd++ = PAGE_SIZE;
267			*cmd++ = src_offset;
268		}
269
270		/* Allow ourselves to be preempted in between blocks. */
271		*cmd++ = MI_ARB_CHECK;
272
273		src_offset += size;
274		dst_offset += size;
275		rem -= size;
276	} while (rem);
277
278	*cmd = MI_BATCH_BUFFER_END;
279	intel_gt_chipset_flush(ce->vm->gt);
280
281	i915_gem_object_unpin_map(pool->obj);
282
283	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
284	if (IS_ERR(batch)) {
285		err = PTR_ERR(batch);
286		goto out_put;
287	}
288
289	err = i915_vma_pin(batch, 0, 0, PIN_USER);
290	if (unlikely(err))
291		goto out_put;
292
293	batch->private = pool;
294	return batch;
295
296out_put:
297	intel_engine_pool_put(pool);
298out_pm:
299	intel_engine_pm_put(ce->engine);
300	return ERR_PTR(err);
301}
302
303static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
304{
305	struct drm_i915_gem_object *obj = vma->obj;
306
307	if (obj->cache_dirty & ~obj->cache_coherent)
308		i915_gem_clflush_object(obj, 0);
309
310	return i915_request_await_object(rq, obj, write);
311}
312
313int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
314			     struct drm_i915_gem_object *dst,
315			     struct intel_context *ce)
316{
317	struct drm_gem_object *objs[] = { &src->base, &dst->base };
318	struct i915_address_space *vm = ce->vm;
319	struct i915_vma *vma[2], *batch;
320	struct ww_acquire_ctx acquire;
321	struct i915_request *rq;
322	int err, i;
323
324	vma[0] = i915_vma_instance(src, vm, NULL);
325	if (IS_ERR(vma[0]))
326		return PTR_ERR(vma[0]);
327
328	err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
329	if (unlikely(err))
330		return err;
331
332	vma[1] = i915_vma_instance(dst, vm, NULL);
333	if (IS_ERR(vma[1]))
334		goto out_unpin_src;
335
336	err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
337	if (unlikely(err))
338		goto out_unpin_src;
339
340	batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
341	if (IS_ERR(batch)) {
342		err = PTR_ERR(batch);
343		goto out_unpin_dst;
344	}
345
346	rq = intel_context_create_request(ce);
347	if (IS_ERR(rq)) {
348		err = PTR_ERR(rq);
349		goto out_batch;
350	}
351
352	err = intel_emit_vma_mark_active(batch, rq);
353	if (unlikely(err))
354		goto out_request;
355
356	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
357	if (unlikely(err))
358		goto out_request;
359
360	for (i = 0; i < ARRAY_SIZE(vma); i++) {
361		err = move_to_gpu(vma[i], rq, i);
362		if (unlikely(err))
363			goto out_unlock;
364	}
365
366	for (i = 0; i < ARRAY_SIZE(vma); i++) {
367		unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
368
369		err = i915_vma_move_to_active(vma[i], rq, flags);
370		if (unlikely(err))
371			goto out_unlock;
372	}
373
374	if (rq->engine->emit_init_breadcrumb) {
375		err = rq->engine->emit_init_breadcrumb(rq);
376		if (unlikely(err))
377			goto out_unlock;
378	}
379
380	err = rq->engine->emit_bb_start(rq,
381					batch->node.start, batch->node.size,
382					0);
383out_unlock:
384	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
385out_request:
386	if (unlikely(err))
387		i915_request_skip(rq, err);
388
389	i915_request_add(rq);
390out_batch:
391	intel_emit_vma_release(ce, batch);
392out_unpin_dst:
393	i915_vma_unpin(vma[1]);
394out_unpin_src:
395	i915_vma_unpin(vma[0]);
396	return err;
397}
398
399#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
400#include "selftests/i915_gem_object_blt.c"
401#endif