Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2019 Intel Corporation
4 */
5
6#include "i915_drv.h"
7#include "gt/intel_context.h"
8#include "gt/intel_engine_pm.h"
9#include "gt/intel_gpu_commands.h"
10#include "gt/intel_gt.h"
11#include "gt/intel_gt_buffer_pool.h"
12#include "gt/intel_ring.h"
13#include "i915_gem_clflush.h"
14#include "i915_gem_object_blt.h"
15
16struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
17 struct i915_vma *vma,
18 struct i915_gem_ww_ctx *ww,
19 u32 value)
20{
21 struct drm_i915_private *i915 = ce->vm->i915;
22 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
23 struct intel_gt_buffer_pool_node *pool;
24 struct i915_vma *batch;
25 u64 offset;
26 u64 count;
27 u64 rem;
28 u32 size;
29 u32 *cmd;
30 int err;
31
32 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
33 intel_engine_pm_get(ce->engine);
34
35 count = div_u64(round_up(vma->size, block_size), block_size);
36 size = (1 + 8 * count) * sizeof(u32);
37 size = round_up(size, PAGE_SIZE);
38 pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC);
39 if (IS_ERR(pool)) {
40 err = PTR_ERR(pool);
41 goto out_pm;
42 }
43
44 err = i915_gem_object_lock(pool->obj, ww);
45 if (err)
46 goto out_put;
47
48 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
49 if (IS_ERR(batch)) {
50 err = PTR_ERR(batch);
51 goto out_put;
52 }
53
54 err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
55 if (unlikely(err))
56 goto out_put;
57
58 /* we pinned the pool, mark it as such */
59 intel_gt_buffer_pool_mark_used(pool);
60
61 cmd = i915_gem_object_pin_map(pool->obj, pool->type);
62 if (IS_ERR(cmd)) {
63 err = PTR_ERR(cmd);
64 goto out_unpin;
65 }
66
67 rem = vma->size;
68 offset = vma->node.start;
69
70 do {
71 u32 size = min_t(u64, rem, block_size);
72
73 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
74
75 if (GRAPHICS_VER(i915) >= 8) {
76 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
77 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
78 *cmd++ = 0;
79 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
80 *cmd++ = lower_32_bits(offset);
81 *cmd++ = upper_32_bits(offset);
82 *cmd++ = value;
83 } else {
84 *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
85 *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
86 *cmd++ = 0;
87 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
88 *cmd++ = offset;
89 *cmd++ = value;
90 }
91
92 /* Allow ourselves to be preempted in between blocks. */
93 *cmd++ = MI_ARB_CHECK;
94
95 offset += size;
96 rem -= size;
97 } while (rem);
98
99 *cmd = MI_BATCH_BUFFER_END;
100
101 i915_gem_object_flush_map(pool->obj);
102 i915_gem_object_unpin_map(pool->obj);
103
104 intel_gt_chipset_flush(ce->vm->gt);
105
106 batch->private = pool;
107 return batch;
108
109out_unpin:
110 i915_vma_unpin(batch);
111out_put:
112 intel_gt_buffer_pool_put(pool);
113out_pm:
114 intel_engine_pm_put(ce->engine);
115 return ERR_PTR(err);
116}
117
118int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
119{
120 int err;
121
122 err = i915_request_await_object(rq, vma->obj, false);
123 if (err == 0)
124 err = i915_vma_move_to_active(vma, rq, 0);
125 if (unlikely(err))
126 return err;
127
128 return intel_gt_buffer_pool_mark_active(vma->private, rq);
129}
130
131void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
132{
133 i915_vma_unpin(vma);
134 intel_gt_buffer_pool_put(vma->private);
135 intel_engine_pm_put(ce->engine);
136}
137
138static int
139move_obj_to_gpu(struct drm_i915_gem_object *obj,
140 struct i915_request *rq,
141 bool write)
142{
143 if (obj->cache_dirty & ~obj->cache_coherent)
144 i915_gem_clflush_object(obj, 0);
145
146 return i915_request_await_object(rq, obj, write);
147}
148
149int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
150 struct intel_context *ce,
151 u32 value)
152{
153 struct i915_gem_ww_ctx ww;
154 struct i915_request *rq;
155 struct i915_vma *batch;
156 struct i915_vma *vma;
157 int err;
158
159 vma = i915_vma_instance(obj, ce->vm, NULL);
160 if (IS_ERR(vma))
161 return PTR_ERR(vma);
162
163 i915_gem_ww_ctx_init(&ww, true);
164 intel_engine_pm_get(ce->engine);
165retry:
166 err = i915_gem_object_lock(obj, &ww);
167 if (err)
168 goto out;
169
170 err = intel_context_pin_ww(ce, &ww);
171 if (err)
172 goto out;
173
174 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
175 if (err)
176 goto out_ctx;
177
178 batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
179 if (IS_ERR(batch)) {
180 err = PTR_ERR(batch);
181 goto out_vma;
182 }
183
184 rq = i915_request_create(ce);
185 if (IS_ERR(rq)) {
186 err = PTR_ERR(rq);
187 goto out_batch;
188 }
189
190 err = intel_emit_vma_mark_active(batch, rq);
191 if (unlikely(err))
192 goto out_request;
193
194 err = move_obj_to_gpu(vma->obj, rq, true);
195 if (err == 0)
196 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
197 if (unlikely(err))
198 goto out_request;
199
200 if (ce->engine->emit_init_breadcrumb)
201 err = ce->engine->emit_init_breadcrumb(rq);
202
203 if (likely(!err))
204 err = ce->engine->emit_bb_start(rq,
205 batch->node.start,
206 batch->node.size,
207 0);
208out_request:
209 if (unlikely(err))
210 i915_request_set_error_once(rq, err);
211
212 i915_request_add(rq);
213out_batch:
214 intel_emit_vma_release(ce, batch);
215out_vma:
216 i915_vma_unpin(vma);
217out_ctx:
218 intel_context_unpin(ce);
219out:
220 if (err == -EDEADLK) {
221 err = i915_gem_ww_ctx_backoff(&ww);
222 if (!err)
223 goto retry;
224 }
225 i915_gem_ww_ctx_fini(&ww);
226 intel_engine_pm_put(ce->engine);
227 return err;
228}
229
230/* Wa_1209644611:icl,ehl */
231static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
232{
233 u32 height = size >> PAGE_SHIFT;
234
235 if (GRAPHICS_VER(i915) != 11)
236 return false;
237
238 return height % 4 == 3 && height <= 8;
239}
240
241struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
242 struct i915_gem_ww_ctx *ww,
243 struct i915_vma *src,
244 struct i915_vma *dst)
245{
246 struct drm_i915_private *i915 = ce->vm->i915;
247 const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
248 struct intel_gt_buffer_pool_node *pool;
249 struct i915_vma *batch;
250 u64 src_offset, dst_offset;
251 u64 count, rem;
252 u32 size, *cmd;
253 int err;
254
255 GEM_BUG_ON(src->size != dst->size);
256
257 GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
258 intel_engine_pm_get(ce->engine);
259
260 count = div_u64(round_up(dst->size, block_size), block_size);
261 size = (1 + 11 * count) * sizeof(u32);
262 size = round_up(size, PAGE_SIZE);
263 pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC);
264 if (IS_ERR(pool)) {
265 err = PTR_ERR(pool);
266 goto out_pm;
267 }
268
269 err = i915_gem_object_lock(pool->obj, ww);
270 if (err)
271 goto out_put;
272
273 batch = i915_vma_instance(pool->obj, ce->vm, NULL);
274 if (IS_ERR(batch)) {
275 err = PTR_ERR(batch);
276 goto out_put;
277 }
278
279 err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
280 if (unlikely(err))
281 goto out_put;
282
283 /* we pinned the pool, mark it as such */
284 intel_gt_buffer_pool_mark_used(pool);
285
286 cmd = i915_gem_object_pin_map(pool->obj, pool->type);
287 if (IS_ERR(cmd)) {
288 err = PTR_ERR(cmd);
289 goto out_unpin;
290 }
291
292 rem = src->size;
293 src_offset = src->node.start;
294 dst_offset = dst->node.start;
295
296 do {
297 size = min_t(u64, rem, block_size);
298 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
299
300 if (GRAPHICS_VER(i915) >= 9 &&
301 !wa_1209644611_applies(i915, size)) {
302 *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
303 *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
304 *cmd++ = 0;
305 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
306 *cmd++ = lower_32_bits(dst_offset);
307 *cmd++ = upper_32_bits(dst_offset);
308 *cmd++ = 0;
309 *cmd++ = PAGE_SIZE;
310 *cmd++ = lower_32_bits(src_offset);
311 *cmd++ = upper_32_bits(src_offset);
312 } else if (GRAPHICS_VER(i915) >= 8) {
313 *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
314 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
315 *cmd++ = 0;
316 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
317 *cmd++ = lower_32_bits(dst_offset);
318 *cmd++ = upper_32_bits(dst_offset);
319 *cmd++ = 0;
320 *cmd++ = PAGE_SIZE;
321 *cmd++ = lower_32_bits(src_offset);
322 *cmd++ = upper_32_bits(src_offset);
323 } else {
324 *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
325 *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
326 *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
327 *cmd++ = dst_offset;
328 *cmd++ = PAGE_SIZE;
329 *cmd++ = src_offset;
330 }
331
332 /* Allow ourselves to be preempted in between blocks. */
333 *cmd++ = MI_ARB_CHECK;
334
335 src_offset += size;
336 dst_offset += size;
337 rem -= size;
338 } while (rem);
339
340 *cmd = MI_BATCH_BUFFER_END;
341
342 i915_gem_object_flush_map(pool->obj);
343 i915_gem_object_unpin_map(pool->obj);
344
345 intel_gt_chipset_flush(ce->vm->gt);
346 batch->private = pool;
347 return batch;
348
349out_unpin:
350 i915_vma_unpin(batch);
351out_put:
352 intel_gt_buffer_pool_put(pool);
353out_pm:
354 intel_engine_pm_put(ce->engine);
355 return ERR_PTR(err);
356}
357
358int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
359 struct drm_i915_gem_object *dst,
360 struct intel_context *ce)
361{
362 struct i915_address_space *vm = ce->vm;
363 struct i915_vma *vma[2], *batch;
364 struct i915_gem_ww_ctx ww;
365 struct i915_request *rq;
366 int err, i;
367
368 vma[0] = i915_vma_instance(src, vm, NULL);
369 if (IS_ERR(vma[0]))
370 return PTR_ERR(vma[0]);
371
372 vma[1] = i915_vma_instance(dst, vm, NULL);
373 if (IS_ERR(vma[1]))
374 return PTR_ERR(vma[1]);
375
376 i915_gem_ww_ctx_init(&ww, true);
377 intel_engine_pm_get(ce->engine);
378retry:
379 err = i915_gem_object_lock(src, &ww);
380 if (!err)
381 err = i915_gem_object_lock(dst, &ww);
382 if (!err)
383 err = intel_context_pin_ww(ce, &ww);
384 if (err)
385 goto out;
386
387 err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
388 if (err)
389 goto out_ctx;
390
391 err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
392 if (unlikely(err))
393 goto out_unpin_src;
394
395 batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
396 if (IS_ERR(batch)) {
397 err = PTR_ERR(batch);
398 goto out_unpin_dst;
399 }
400
401 rq = i915_request_create(ce);
402 if (IS_ERR(rq)) {
403 err = PTR_ERR(rq);
404 goto out_batch;
405 }
406
407 err = intel_emit_vma_mark_active(batch, rq);
408 if (unlikely(err))
409 goto out_request;
410
411 for (i = 0; i < ARRAY_SIZE(vma); i++) {
412 err = move_obj_to_gpu(vma[i]->obj, rq, i);
413 if (unlikely(err))
414 goto out_request;
415 }
416
417 for (i = 0; i < ARRAY_SIZE(vma); i++) {
418 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
419
420 err = i915_vma_move_to_active(vma[i], rq, flags);
421 if (unlikely(err))
422 goto out_request;
423 }
424
425 if (rq->engine->emit_init_breadcrumb) {
426 err = rq->engine->emit_init_breadcrumb(rq);
427 if (unlikely(err))
428 goto out_request;
429 }
430
431 err = rq->engine->emit_bb_start(rq,
432 batch->node.start, batch->node.size,
433 0);
434
435out_request:
436 if (unlikely(err))
437 i915_request_set_error_once(rq, err);
438
439 i915_request_add(rq);
440out_batch:
441 intel_emit_vma_release(ce, batch);
442out_unpin_dst:
443 i915_vma_unpin(vma[1]);
444out_unpin_src:
445 i915_vma_unpin(vma[0]);
446out_ctx:
447 intel_context_unpin(ce);
448out:
449 if (err == -EDEADLK) {
450 err = i915_gem_ww_ctx_backoff(&ww);
451 if (!err)
452 goto retry;
453 }
454 i915_gem_ww_ctx_fini(&ww);
455 intel_engine_pm_put(ce->engine);
456 return err;
457}
458
459#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
460#include "selftests/i915_gem_object_blt.c"
461#endif