Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2019 Intel Corporation
4 */
5
6#include "i915_drv.h"
7#include "gt/intel_context.h"
8#include "gt/intel_engine_pm.h"
9#include "gt/intel_gt.h"
10#include "gt/intel_gt_buffer_pool.h"
11#include "gt/intel_ring.h"
12#include "i915_gem_clflush.h"
13#include "i915_gem_object_blt.h"
14
15struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
16 struct i915_vma *vma,
17 u32 value)
18{
19 struct drm_i915_private *i915 = ce->vm->i915;
20 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
21 struct intel_gt_buffer_pool_node *pool;
22 struct i915_vma *batch;
23 u64 offset;
24 u64 count;
25 u64 rem;
26 u32 size;
27 u32 *cmd;
28 int err;
29
30 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
31 intel_engine_pm_get(ce->engine);
32
33 count = div_u64(round_up(vma->size, block_size), block_size);
34 size = (1 + 8 * count) * sizeof(u32);
35 size = round_up(size, PAGE_SIZE);
36 pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
37 if (IS_ERR(pool)) {
38 err = PTR_ERR(pool);
39 goto out_pm;
40 }
41
42 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
43 if (IS_ERR(cmd)) {
44 err = PTR_ERR(cmd);
45 goto out_put;
46 }
47
48 rem = vma->size;
49 offset = vma->node.start;
50
51 do {
52 u32 size = min_t(u64, rem, block_size);
53
54 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
55
56 if (INTEL_GEN(i915) >= 8) {
57 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
58 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
59 *cmd++ = 0;
60 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
61 *cmd++ = lower_32_bits(offset);
62 *cmd++ = upper_32_bits(offset);
63 *cmd++ = value;
64 } else {
65 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
66 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
67 *cmd++ = 0;
68 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
69 *cmd++ = offset;
70 *cmd++ = value;
71 }
72
73 /* Allow ourselves to be preempted in between blocks. */
74 *cmd++ = MI_ARB_CHECK;
75
76 offset += size;
77 rem -= size;
78 } while (rem);
79
80 *cmd = MI_BATCH_BUFFER_END;
81
82 i915_gem_object_flush_map(pool->obj);
83 i915_gem_object_unpin_map(pool->obj);
84
85 intel_gt_chipset_flush(ce->vm->gt);
86
87 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
88 if (IS_ERR(batch)) {
89 err = PTR_ERR(batch);
90 goto out_put;
91 }
92
93 err = i915_vma_pin(batch, 0, 0, PIN_USER);
94 if (unlikely(err))
95 goto out_put;
96
97 batch->private = pool;
98 return batch;
99
100out_put:
101 intel_gt_buffer_pool_put(pool);
102out_pm:
103 intel_engine_pm_put(ce->engine);
104 return ERR_PTR(err);
105}
106
107int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
108{
109 int err;
110
111 i915_vma_lock(vma);
112 err = i915_request_await_object(rq, vma->obj, false);
113 if (err == 0)
114 err = i915_vma_move_to_active(vma, rq, 0);
115 i915_vma_unlock(vma);
116 if (unlikely(err))
117 return err;
118
119 return intel_gt_buffer_pool_mark_active(vma->private, rq);
120}
121
122void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
123{
124 i915_vma_unpin(vma);
125 intel_gt_buffer_pool_put(vma->private);
126 intel_engine_pm_put(ce->engine);
127}
128
129static int
130move_obj_to_gpu(struct drm_i915_gem_object *obj,
131 struct i915_request *rq,
132 bool write)
133{
134 if (obj->cache_dirty & ~obj->cache_coherent)
135 i915_gem_clflush_object(obj, 0);
136
137 return i915_request_await_object(rq, obj, write);
138}
139
140int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
141 struct intel_context *ce,
142 u32 value)
143{
144 struct i915_request *rq;
145 struct i915_vma *batch;
146 struct i915_vma *vma;
147 int err;
148
149 vma = i915_vma_instance(obj, ce->vm, NULL);
150 if (IS_ERR(vma))
151 return PTR_ERR(vma);
152
153 err = i915_vma_pin(vma, 0, 0, PIN_USER);
154 if (unlikely(err))
155 return err;
156
157 batch = intel_emit_vma_fill_blt(ce, vma, value);
158 if (IS_ERR(batch)) {
159 err = PTR_ERR(batch);
160 goto out_unpin;
161 }
162
163 rq = intel_context_create_request(ce);
164 if (IS_ERR(rq)) {
165 err = PTR_ERR(rq);
166 goto out_batch;
167 }
168
169 err = intel_emit_vma_mark_active(batch, rq);
170 if (unlikely(err))
171 goto out_request;
172
173 i915_vma_lock(vma);
174 err = move_obj_to_gpu(vma->obj, rq, true);
175 if (err == 0)
176 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
177 i915_vma_unlock(vma);
178 if (unlikely(err))
179 goto out_request;
180
181 if (ce->engine->emit_init_breadcrumb)
182 err = ce->engine->emit_init_breadcrumb(rq);
183
184 if (likely(!err))
185 err = ce->engine->emit_bb_start(rq,
186 batch->node.start,
187 batch->node.size,
188 0);
189out_request:
190 if (unlikely(err))
191 i915_request_set_error_once(rq, err);
192
193 i915_request_add(rq);
194out_batch:
195 intel_emit_vma_release(ce, batch);
196out_unpin:
197 i915_vma_unpin(vma);
198 return err;
199}
200
201/* Wa_1209644611:icl,ehl */
202static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
203{
204 u32 height = size >> PAGE_SHIFT;
205
206 if (!IS_GEN(i915, 11))
207 return false;
208
209 return height % 4 == 3 && height <= 8;
210}
211
212struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
213 struct i915_vma *src,
214 struct i915_vma *dst)
215{
216 struct drm_i915_private *i915 = ce->vm->i915;
217 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
218 struct intel_gt_buffer_pool_node *pool;
219 struct i915_vma *batch;
220 u64 src_offset, dst_offset;
221 u64 count, rem;
222 u32 size, *cmd;
223 int err;
224
225 GEM_BUG_ON(src->size != dst->size);
226
227 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
228 intel_engine_pm_get(ce->engine);
229
230 count = div_u64(round_up(dst->size, block_size), block_size);
231 size = (1 + 11 * count) * sizeof(u32);
232 size = round_up(size, PAGE_SIZE);
233 pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
234 if (IS_ERR(pool)) {
235 err = PTR_ERR(pool);
236 goto out_pm;
237 }
238
239 cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
240 if (IS_ERR(cmd)) {
241 err = PTR_ERR(cmd);
242 goto out_put;
243 }
244
245 rem = src->size;
246 src_offset = src->node.start;
247 dst_offset = dst->node.start;
248
249 do {
250 size = min_t(u64, rem, block_size);
251 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
252
253 if (INTEL_GEN(i915) >= 9 &&
254 !wa_1209644611_applies(i915, size)) {
255 *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
256 *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
257 *cmd++ = 0;
258 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
259 *cmd++ = lower_32_bits(dst_offset);
260 *cmd++ = upper_32_bits(dst_offset);
261 *cmd++ = 0;
262 *cmd++ = PAGE_SIZE;
263 *cmd++ = lower_32_bits(src_offset);
264 *cmd++ = upper_32_bits(src_offset);
265 } else if (INTEL_GEN(i915) >= 8) {
266 *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
267 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
268 *cmd++ = 0;
269 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
270 *cmd++ = lower_32_bits(dst_offset);
271 *cmd++ = upper_32_bits(dst_offset);
272 *cmd++ = 0;
273 *cmd++ = PAGE_SIZE;
274 *cmd++ = lower_32_bits(src_offset);
275 *cmd++ = upper_32_bits(src_offset);
276 } else {
277 *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
278 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
279 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
280 *cmd++ = dst_offset;
281 *cmd++ = PAGE_SIZE;
282 *cmd++ = src_offset;
283 }
284
285 /* Allow ourselves to be preempted in between blocks. */
286 *cmd++ = MI_ARB_CHECK;
287
288 src_offset += size;
289 dst_offset += size;
290 rem -= size;
291 } while (rem);
292
293 *cmd = MI_BATCH_BUFFER_END;
294
295 i915_gem_object_flush_map(pool->obj);
296 i915_gem_object_unpin_map(pool->obj);
297
298 intel_gt_chipset_flush(ce->vm->gt);
299
300 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
301 if (IS_ERR(batch)) {
302 err = PTR_ERR(batch);
303 goto out_put;
304 }
305
306 err = i915_vma_pin(batch, 0, 0, PIN_USER);
307 if (unlikely(err))
308 goto out_put;
309
310 batch->private = pool;
311 return batch;
312
313out_put:
314 intel_gt_buffer_pool_put(pool);
315out_pm:
316 intel_engine_pm_put(ce->engine);
317 return ERR_PTR(err);
318}
319
320int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
321 struct drm_i915_gem_object *dst,
322 struct intel_context *ce)
323{
324 struct drm_gem_object *objs[] = { &src->base, &dst->base };
325 struct i915_address_space *vm = ce->vm;
326 struct i915_vma *vma[2], *batch;
327 struct ww_acquire_ctx acquire;
328 struct i915_request *rq;
329 int err, i;
330
331 vma[0] = i915_vma_instance(src, vm, NULL);
332 if (IS_ERR(vma[0]))
333 return PTR_ERR(vma[0]);
334
335 err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
336 if (unlikely(err))
337 return err;
338
339 vma[1] = i915_vma_instance(dst, vm, NULL);
340 if (IS_ERR(vma[1]))
341 goto out_unpin_src;
342
343 err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
344 if (unlikely(err))
345 goto out_unpin_src;
346
347 batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
348 if (IS_ERR(batch)) {
349 err = PTR_ERR(batch);
350 goto out_unpin_dst;
351 }
352
353 rq = intel_context_create_request(ce);
354 if (IS_ERR(rq)) {
355 err = PTR_ERR(rq);
356 goto out_batch;
357 }
358
359 err = intel_emit_vma_mark_active(batch, rq);
360 if (unlikely(err))
361 goto out_request;
362
363 err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
364 if (unlikely(err))
365 goto out_request;
366
367 for (i = 0; i < ARRAY_SIZE(vma); i++) {
368 err = move_obj_to_gpu(vma[i]->obj, rq, i);
369 if (unlikely(err))
370 goto out_unlock;
371 }
372
373 for (i = 0; i < ARRAY_SIZE(vma); i++) {
374 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
375
376 err = i915_vma_move_to_active(vma[i], rq, flags);
377 if (unlikely(err))
378 goto out_unlock;
379 }
380
381 if (rq->engine->emit_init_breadcrumb) {
382 err = rq->engine->emit_init_breadcrumb(rq);
383 if (unlikely(err))
384 goto out_unlock;
385 }
386
387 err = rq->engine->emit_bb_start(rq,
388 batch->node.start, batch->node.size,
389 0);
390out_unlock:
391 drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
392out_request:
393 if (unlikely(err))
394 i915_request_set_error_once(rq, err);
395
396 i915_request_add(rq);
397out_batch:
398 intel_emit_vma_release(ce, batch);
399out_unpin_dst:
400 i915_vma_unpin(vma[1]);
401out_unpin_src:
402 i915_vma_unpin(vma[0]);
403 return err;
404}
405
406#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
407#include "selftests/i915_gem_object_blt.c"
408#endif