Loading...
Note: File does not exist in v5.9.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_vm.h"
7
8#include <linux/dma-fence-array.h>
9#include <linux/nospec.h>
10
11#include <drm/drm_exec.h>
12#include <drm/drm_print.h>
13#include <drm/ttm/ttm_execbuf_util.h>
14#include <drm/ttm/ttm_tt.h>
15#include <uapi/drm/xe_drm.h>
16#include <linux/ascii85.h>
17#include <linux/delay.h>
18#include <linux/kthread.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21
22#include <generated/xe_wa_oob.h>
23
24#include "regs/xe_gtt_defs.h"
25#include "xe_assert.h"
26#include "xe_bo.h"
27#include "xe_device.h"
28#include "xe_drm_client.h"
29#include "xe_exec_queue.h"
30#include "xe_gt_pagefault.h"
31#include "xe_gt_tlb_invalidation.h"
32#include "xe_migrate.h"
33#include "xe_pat.h"
34#include "xe_pm.h"
35#include "xe_preempt_fence.h"
36#include "xe_pt.h"
37#include "xe_res_cursor.h"
38#include "xe_sync.h"
39#include "xe_trace_bo.h"
40#include "xe_wa.h"
41#include "xe_hmm.h"
42
43static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
44{
45 return vm->gpuvm.r_obj;
46}
47
48/**
49 * xe_vma_userptr_check_repin() - Advisory check for repin needed
50 * @uvma: The userptr vma
51 *
52 * Check if the userptr vma has been invalidated since last successful
53 * repin. The check is advisory only and can the function can be called
54 * without the vm->userptr.notifier_lock held. There is no guarantee that the
55 * vma userptr will remain valid after a lockless check, so typically
56 * the call needs to be followed by a proper check under the notifier_lock.
57 *
58 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
59 */
60int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
61{
62 return mmu_interval_check_retry(&uvma->userptr.notifier,
63 uvma->userptr.notifier_seq) ?
64 -EAGAIN : 0;
65}
66
67int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
68{
69 struct xe_vma *vma = &uvma->vma;
70 struct xe_vm *vm = xe_vma_vm(vma);
71 struct xe_device *xe = vm->xe;
72
73 lockdep_assert_held(&vm->lock);
74 xe_assert(xe, xe_vma_is_userptr(vma));
75
76 return xe_hmm_userptr_populate_range(uvma, false);
77}
78
79static bool preempt_fences_waiting(struct xe_vm *vm)
80{
81 struct xe_exec_queue *q;
82
83 lockdep_assert_held(&vm->lock);
84 xe_vm_assert_held(vm);
85
86 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
87 if (!q->lr.pfence ||
88 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
89 &q->lr.pfence->flags)) {
90 return true;
91 }
92 }
93
94 return false;
95}
96
97static void free_preempt_fences(struct list_head *list)
98{
99 struct list_head *link, *next;
100
101 list_for_each_safe(link, next, list)
102 xe_preempt_fence_free(to_preempt_fence_from_link(link));
103}
104
105static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
106 unsigned int *count)
107{
108 lockdep_assert_held(&vm->lock);
109 xe_vm_assert_held(vm);
110
111 if (*count >= vm->preempt.num_exec_queues)
112 return 0;
113
114 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
115 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
116
117 if (IS_ERR(pfence))
118 return PTR_ERR(pfence);
119
120 list_move_tail(xe_preempt_fence_link(pfence), list);
121 }
122
123 return 0;
124}
125
126static int wait_for_existing_preempt_fences(struct xe_vm *vm)
127{
128 struct xe_exec_queue *q;
129
130 xe_vm_assert_held(vm);
131
132 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
133 if (q->lr.pfence) {
134 long timeout = dma_fence_wait(q->lr.pfence, false);
135
136 /* Only -ETIME on fence indicates VM needs to be killed */
137 if (timeout < 0 || q->lr.pfence->error == -ETIME)
138 return -ETIME;
139
140 dma_fence_put(q->lr.pfence);
141 q->lr.pfence = NULL;
142 }
143 }
144
145 return 0;
146}
147
148static bool xe_vm_is_idle(struct xe_vm *vm)
149{
150 struct xe_exec_queue *q;
151
152 xe_vm_assert_held(vm);
153 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
154 if (!xe_exec_queue_is_idle(q))
155 return false;
156 }
157
158 return true;
159}
160
161static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
162{
163 struct list_head *link;
164 struct xe_exec_queue *q;
165
166 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
167 struct dma_fence *fence;
168
169 link = list->next;
170 xe_assert(vm->xe, link != list);
171
172 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
173 q, q->lr.context,
174 ++q->lr.seqno);
175 dma_fence_put(q->lr.pfence);
176 q->lr.pfence = fence;
177 }
178}
179
180static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
181{
182 struct xe_exec_queue *q;
183 int err;
184
185 xe_bo_assert_held(bo);
186
187 if (!vm->preempt.num_exec_queues)
188 return 0;
189
190 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
191 if (err)
192 return err;
193
194 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
195 if (q->lr.pfence) {
196 dma_resv_add_fence(bo->ttm.base.resv,
197 q->lr.pfence,
198 DMA_RESV_USAGE_BOOKKEEP);
199 }
200
201 return 0;
202}
203
204static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
205 struct drm_exec *exec)
206{
207 struct xe_exec_queue *q;
208
209 lockdep_assert_held(&vm->lock);
210 xe_vm_assert_held(vm);
211
212 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
213 q->ops->resume(q);
214
215 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
216 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
217 }
218}
219
220int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
221{
222 struct drm_gpuvm_exec vm_exec = {
223 .vm = &vm->gpuvm,
224 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
225 .num_fences = 1,
226 };
227 struct drm_exec *exec = &vm_exec.exec;
228 struct dma_fence *pfence;
229 int err;
230 bool wait;
231
232 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
233
234 down_write(&vm->lock);
235 err = drm_gpuvm_exec_lock(&vm_exec);
236 if (err)
237 goto out_up_write;
238
239 pfence = xe_preempt_fence_create(q, q->lr.context,
240 ++q->lr.seqno);
241 if (!pfence) {
242 err = -ENOMEM;
243 goto out_fini;
244 }
245
246 list_add(&q->lr.link, &vm->preempt.exec_queues);
247 ++vm->preempt.num_exec_queues;
248 q->lr.pfence = pfence;
249
250 down_read(&vm->userptr.notifier_lock);
251
252 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
253 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
254
255 /*
256 * Check to see if a preemption on VM is in flight or userptr
257 * invalidation, if so trigger this preempt fence to sync state with
258 * other preempt fences on the VM.
259 */
260 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
261 if (wait)
262 dma_fence_enable_sw_signaling(pfence);
263
264 up_read(&vm->userptr.notifier_lock);
265
266out_fini:
267 drm_exec_fini(exec);
268out_up_write:
269 up_write(&vm->lock);
270
271 return err;
272}
273
274/**
275 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
276 * @vm: The VM.
277 * @q: The exec_queue
278 *
279 * Note that this function might be called multiple times on the same queue.
280 */
281void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
282{
283 if (!xe_vm_in_preempt_fence_mode(vm))
284 return;
285
286 down_write(&vm->lock);
287 if (!list_empty(&q->lr.link)) {
288 list_del_init(&q->lr.link);
289 --vm->preempt.num_exec_queues;
290 }
291 if (q->lr.pfence) {
292 dma_fence_enable_sw_signaling(q->lr.pfence);
293 dma_fence_put(q->lr.pfence);
294 q->lr.pfence = NULL;
295 }
296 up_write(&vm->lock);
297}
298
299/**
300 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
301 * that need repinning.
302 * @vm: The VM.
303 *
304 * This function checks for whether the VM has userptrs that need repinning,
305 * and provides a release-type barrier on the userptr.notifier_lock after
306 * checking.
307 *
308 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
309 */
310int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
311{
312 lockdep_assert_held_read(&vm->userptr.notifier_lock);
313
314 return (list_empty(&vm->userptr.repin_list) &&
315 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
316}
317
318#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
319
320/**
321 * xe_vm_kill() - VM Kill
322 * @vm: The VM.
323 * @unlocked: Flag indicates the VM's dma-resv is not held
324 *
325 * Kill the VM by setting banned flag indicated VM is no longer available for
326 * use. If in preempt fence mode, also kill all exec queue attached to the VM.
327 */
328void xe_vm_kill(struct xe_vm *vm, bool unlocked)
329{
330 struct xe_exec_queue *q;
331
332 lockdep_assert_held(&vm->lock);
333
334 if (unlocked)
335 xe_vm_lock(vm, false);
336
337 vm->flags |= XE_VM_FLAG_BANNED;
338 trace_xe_vm_kill(vm);
339
340 list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
341 q->ops->kill(q);
342
343 if (unlocked)
344 xe_vm_unlock(vm);
345
346 /* TODO: Inform user the VM is banned */
347}
348
349/**
350 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
351 * @exec: The drm_exec object used for locking before validation.
352 * @err: The error returned from ttm_bo_validate().
353 * @end: A ktime_t cookie that should be set to 0 before first use and
354 * that should be reused on subsequent calls.
355 *
356 * With multiple active VMs, under memory pressure, it is possible that
357 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
358 * Until ttm properly handles locking in such scenarios, best thing the
359 * driver can do is retry with a timeout. Check if that is necessary, and
360 * if so unlock the drm_exec's objects while keeping the ticket to prepare
361 * for a rerun.
362 *
363 * Return: true if a retry after drm_exec_init() is recommended;
364 * false otherwise.
365 */
366bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
367{
368 ktime_t cur;
369
370 if (err != -ENOMEM)
371 return false;
372
373 cur = ktime_get();
374 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
375 if (!ktime_before(cur, *end))
376 return false;
377
378 msleep(20);
379 return true;
380}
381
382static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
383{
384 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
385 struct drm_gpuva *gpuva;
386 int ret;
387
388 lockdep_assert_held(&vm->lock);
389 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
390 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
391 &vm->rebind_list);
392
393 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
394 if (ret)
395 return ret;
396
397 vm_bo->evicted = false;
398 return 0;
399}
400
401/**
402 * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
403 * @vm: The vm for which we are rebinding.
404 * @exec: The struct drm_exec with the locked GEM objects.
405 * @num_fences: The number of fences to reserve for the operation, not
406 * including rebinds and validations.
407 *
408 * Validates all evicted gem objects and rebinds their vmas. Note that
409 * rebindings may cause evictions and hence the validation-rebind
410 * sequence is rerun until there are no more objects to validate.
411 *
412 * Return: 0 on success, negative error code on error. In particular,
413 * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
414 * the drm_exec transaction needs to be restarted.
415 */
416int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
417 unsigned int num_fences)
418{
419 struct drm_gem_object *obj;
420 unsigned long index;
421 int ret;
422
423 do {
424 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
425 if (ret)
426 return ret;
427
428 ret = xe_vm_rebind(vm, false);
429 if (ret)
430 return ret;
431 } while (!list_empty(&vm->gpuvm.evict.list));
432
433 drm_exec_for_each_locked_object(exec, index, obj) {
434 ret = dma_resv_reserve_fences(obj->resv, num_fences);
435 if (ret)
436 return ret;
437 }
438
439 return 0;
440}
441
442static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
443 bool *done)
444{
445 int err;
446
447 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
448 if (err)
449 return err;
450
451 if (xe_vm_is_idle(vm)) {
452 vm->preempt.rebind_deactivated = true;
453 *done = true;
454 return 0;
455 }
456
457 if (!preempt_fences_waiting(vm)) {
458 *done = true;
459 return 0;
460 }
461
462 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
463 if (err)
464 return err;
465
466 err = wait_for_existing_preempt_fences(vm);
467 if (err)
468 return err;
469
470 /*
471 * Add validation and rebinding to the locking loop since both can
472 * cause evictions which may require blocing dma_resv locks.
473 * The fence reservation here is intended for the new preempt fences
474 * we attach at the end of the rebind work.
475 */
476 return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
477}
478
479static void preempt_rebind_work_func(struct work_struct *w)
480{
481 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
482 struct drm_exec exec;
483 unsigned int fence_count = 0;
484 LIST_HEAD(preempt_fences);
485 ktime_t end = 0;
486 int err = 0;
487 long wait;
488 int __maybe_unused tries = 0;
489
490 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
491 trace_xe_vm_rebind_worker_enter(vm);
492
493 down_write(&vm->lock);
494
495 if (xe_vm_is_closed_or_banned(vm)) {
496 up_write(&vm->lock);
497 trace_xe_vm_rebind_worker_exit(vm);
498 return;
499 }
500
501retry:
502 if (xe_vm_userptr_check_repin(vm)) {
503 err = xe_vm_userptr_pin(vm);
504 if (err)
505 goto out_unlock_outer;
506 }
507
508 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
509
510 drm_exec_until_all_locked(&exec) {
511 bool done = false;
512
513 err = xe_preempt_work_begin(&exec, vm, &done);
514 drm_exec_retry_on_contention(&exec);
515 if (err || done) {
516 drm_exec_fini(&exec);
517 if (err && xe_vm_validate_should_retry(&exec, err, &end))
518 err = -EAGAIN;
519
520 goto out_unlock_outer;
521 }
522 }
523
524 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
525 if (err)
526 goto out_unlock;
527
528 err = xe_vm_rebind(vm, true);
529 if (err)
530 goto out_unlock;
531
532 /* Wait on rebinds and munmap style VM unbinds */
533 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
534 DMA_RESV_USAGE_KERNEL,
535 false, MAX_SCHEDULE_TIMEOUT);
536 if (wait <= 0) {
537 err = -ETIME;
538 goto out_unlock;
539 }
540
541#define retry_required(__tries, __vm) \
542 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
543 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
544 __xe_vm_userptr_needs_repin(__vm))
545
546 down_read(&vm->userptr.notifier_lock);
547 if (retry_required(tries, vm)) {
548 up_read(&vm->userptr.notifier_lock);
549 err = -EAGAIN;
550 goto out_unlock;
551 }
552
553#undef retry_required
554
555 spin_lock(&vm->xe->ttm.lru_lock);
556 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
557 spin_unlock(&vm->xe->ttm.lru_lock);
558
559 /* Point of no return. */
560 arm_preempt_fences(vm, &preempt_fences);
561 resume_and_reinstall_preempt_fences(vm, &exec);
562 up_read(&vm->userptr.notifier_lock);
563
564out_unlock:
565 drm_exec_fini(&exec);
566out_unlock_outer:
567 if (err == -EAGAIN) {
568 trace_xe_vm_rebind_worker_retry(vm);
569 goto retry;
570 }
571
572 if (err) {
573 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
574 xe_vm_kill(vm, true);
575 }
576 up_write(&vm->lock);
577
578 free_preempt_fences(&preempt_fences);
579
580 trace_xe_vm_rebind_worker_exit(vm);
581}
582
583static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
584{
585 struct xe_userptr *userptr = &uvma->userptr;
586 struct xe_vma *vma = &uvma->vma;
587 struct dma_resv_iter cursor;
588 struct dma_fence *fence;
589 long err;
590
591 /*
592 * Tell exec and rebind worker they need to repin and rebind this
593 * userptr.
594 */
595 if (!xe_vm_in_fault_mode(vm) &&
596 !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
597 spin_lock(&vm->userptr.invalidated_lock);
598 list_move_tail(&userptr->invalidate_link,
599 &vm->userptr.invalidated);
600 spin_unlock(&vm->userptr.invalidated_lock);
601 }
602
603 /*
604 * Preempt fences turn into schedule disables, pipeline these.
605 * Note that even in fault mode, we need to wait for binds and
606 * unbinds to complete, and those are attached as BOOKMARK fences
607 * to the vm.
608 */
609 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
610 DMA_RESV_USAGE_BOOKKEEP);
611 dma_resv_for_each_fence_unlocked(&cursor, fence)
612 dma_fence_enable_sw_signaling(fence);
613 dma_resv_iter_end(&cursor);
614
615 err = dma_resv_wait_timeout(xe_vm_resv(vm),
616 DMA_RESV_USAGE_BOOKKEEP,
617 false, MAX_SCHEDULE_TIMEOUT);
618 XE_WARN_ON(err <= 0);
619
620 if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
621 err = xe_vm_invalidate_vma(vma);
622 XE_WARN_ON(err);
623 }
624
625 xe_hmm_userptr_unmap(uvma);
626}
627
628static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
629 const struct mmu_notifier_range *range,
630 unsigned long cur_seq)
631{
632 struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
633 struct xe_vma *vma = &uvma->vma;
634 struct xe_vm *vm = xe_vma_vm(vma);
635
636 xe_assert(vm->xe, xe_vma_is_userptr(vma));
637 trace_xe_vma_userptr_invalidate(vma);
638
639 if (!mmu_notifier_range_blockable(range))
640 return false;
641
642 vm_dbg(&xe_vma_vm(vma)->xe->drm,
643 "NOTIFIER: addr=0x%016llx, range=0x%016llx",
644 xe_vma_start(vma), xe_vma_size(vma));
645
646 down_write(&vm->userptr.notifier_lock);
647 mmu_interval_set_seq(mni, cur_seq);
648
649 __vma_userptr_invalidate(vm, uvma);
650 up_write(&vm->userptr.notifier_lock);
651 trace_xe_vma_userptr_invalidate_complete(vma);
652
653 return true;
654}
655
656static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
657 .invalidate = vma_userptr_invalidate,
658};
659
660#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
661/**
662 * xe_vma_userptr_force_invalidate() - force invalidate a userptr
663 * @uvma: The userptr vma to invalidate
664 *
665 * Perform a forced userptr invalidation for testing purposes.
666 */
667void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
668{
669 struct xe_vm *vm = xe_vma_vm(&uvma->vma);
670
671 /* Protect against concurrent userptr pinning */
672 lockdep_assert_held(&vm->lock);
673 /* Protect against concurrent notifiers */
674 lockdep_assert_held(&vm->userptr.notifier_lock);
675 /*
676 * Protect against concurrent instances of this function and
677 * the critical exec sections
678 */
679 xe_vm_assert_held(vm);
680
681 if (!mmu_interval_read_retry(&uvma->userptr.notifier,
682 uvma->userptr.notifier_seq))
683 uvma->userptr.notifier_seq -= 2;
684 __vma_userptr_invalidate(vm, uvma);
685}
686#endif
687
688int xe_vm_userptr_pin(struct xe_vm *vm)
689{
690 struct xe_userptr_vma *uvma, *next;
691 int err = 0;
692 LIST_HEAD(tmp_evict);
693
694 xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
695 lockdep_assert_held_write(&vm->lock);
696
697 /* Collect invalidated userptrs */
698 spin_lock(&vm->userptr.invalidated_lock);
699 xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
700 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
701 userptr.invalidate_link) {
702 list_del_init(&uvma->userptr.invalidate_link);
703 list_add_tail(&uvma->userptr.repin_link,
704 &vm->userptr.repin_list);
705 }
706 spin_unlock(&vm->userptr.invalidated_lock);
707
708 /* Pin and move to bind list */
709 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
710 userptr.repin_link) {
711 err = xe_vma_userptr_pin_pages(uvma);
712 if (err == -EFAULT) {
713 list_del_init(&uvma->userptr.repin_link);
714 /*
715 * We might have already done the pin once already, but
716 * then had to retry before the re-bind happened, due
717 * some other condition in the caller, but in the
718 * meantime the userptr got dinged by the notifier such
719 * that we need to revalidate here, but this time we hit
720 * the EFAULT. In such a case make sure we remove
721 * ourselves from the rebind list to avoid going down in
722 * flames.
723 */
724 if (!list_empty(&uvma->vma.combined_links.rebind))
725 list_del_init(&uvma->vma.combined_links.rebind);
726
727 /* Wait for pending binds */
728 xe_vm_lock(vm, false);
729 dma_resv_wait_timeout(xe_vm_resv(vm),
730 DMA_RESV_USAGE_BOOKKEEP,
731 false, MAX_SCHEDULE_TIMEOUT);
732
733 err = xe_vm_invalidate_vma(&uvma->vma);
734 xe_vm_unlock(vm);
735 if (err)
736 break;
737 } else {
738 if (err)
739 break;
740
741 list_del_init(&uvma->userptr.repin_link);
742 list_move_tail(&uvma->vma.combined_links.rebind,
743 &vm->rebind_list);
744 }
745 }
746
747 if (err) {
748 down_write(&vm->userptr.notifier_lock);
749 spin_lock(&vm->userptr.invalidated_lock);
750 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
751 userptr.repin_link) {
752 list_del_init(&uvma->userptr.repin_link);
753 list_move_tail(&uvma->userptr.invalidate_link,
754 &vm->userptr.invalidated);
755 }
756 spin_unlock(&vm->userptr.invalidated_lock);
757 up_write(&vm->userptr.notifier_lock);
758 }
759 return err;
760}
761
762/**
763 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
764 * that need repinning.
765 * @vm: The VM.
766 *
767 * This function does an advisory check for whether the VM has userptrs that
768 * need repinning.
769 *
770 * Return: 0 if there are no indications of userptrs needing repinning,
771 * -EAGAIN if there are.
772 */
773int xe_vm_userptr_check_repin(struct xe_vm *vm)
774{
775 return (list_empty_careful(&vm->userptr.repin_list) &&
776 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
777}
778
779static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
780{
781 int i;
782
783 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
784 if (!vops->pt_update_ops[i].num_ops)
785 continue;
786
787 vops->pt_update_ops[i].ops =
788 kmalloc_array(vops->pt_update_ops[i].num_ops,
789 sizeof(*vops->pt_update_ops[i].ops),
790 GFP_KERNEL);
791 if (!vops->pt_update_ops[i].ops)
792 return array_of_binds ? -ENOBUFS : -ENOMEM;
793 }
794
795 return 0;
796}
797
798static void xe_vma_ops_fini(struct xe_vma_ops *vops)
799{
800 int i;
801
802 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
803 kfree(vops->pt_update_ops[i].ops);
804}
805
806static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
807{
808 int i;
809
810 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
811 if (BIT(i) & tile_mask)
812 ++vops->pt_update_ops[i].num_ops;
813}
814
815static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
816 u8 tile_mask)
817{
818 INIT_LIST_HEAD(&op->link);
819 op->tile_mask = tile_mask;
820 op->base.op = DRM_GPUVA_OP_MAP;
821 op->base.map.va.addr = vma->gpuva.va.addr;
822 op->base.map.va.range = vma->gpuva.va.range;
823 op->base.map.gem.obj = vma->gpuva.gem.obj;
824 op->base.map.gem.offset = vma->gpuva.gem.offset;
825 op->map.vma = vma;
826 op->map.immediate = true;
827 op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
828 op->map.is_null = xe_vma_is_null(vma);
829}
830
831static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
832 u8 tile_mask)
833{
834 struct xe_vma_op *op;
835
836 op = kzalloc(sizeof(*op), GFP_KERNEL);
837 if (!op)
838 return -ENOMEM;
839
840 xe_vm_populate_rebind(op, vma, tile_mask);
841 list_add_tail(&op->link, &vops->list);
842 xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
843
844 return 0;
845}
846
847static struct dma_fence *ops_execute(struct xe_vm *vm,
848 struct xe_vma_ops *vops);
849static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
850 struct xe_exec_queue *q,
851 struct xe_sync_entry *syncs, u32 num_syncs);
852
853int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
854{
855 struct dma_fence *fence;
856 struct xe_vma *vma, *next;
857 struct xe_vma_ops vops;
858 struct xe_vma_op *op, *next_op;
859 int err, i;
860
861 lockdep_assert_held(&vm->lock);
862 if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
863 list_empty(&vm->rebind_list))
864 return 0;
865
866 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
867 for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
868 vops.pt_update_ops[i].wait_vm_bookkeep = true;
869
870 xe_vm_assert_held(vm);
871 list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
872 xe_assert(vm->xe, vma->tile_present);
873
874 if (rebind_worker)
875 trace_xe_vma_rebind_worker(vma);
876 else
877 trace_xe_vma_rebind_exec(vma);
878
879 err = xe_vm_ops_add_rebind(&vops, vma,
880 vma->tile_present);
881 if (err)
882 goto free_ops;
883 }
884
885 err = xe_vma_ops_alloc(&vops, false);
886 if (err)
887 goto free_ops;
888
889 fence = ops_execute(vm, &vops);
890 if (IS_ERR(fence)) {
891 err = PTR_ERR(fence);
892 } else {
893 dma_fence_put(fence);
894 list_for_each_entry_safe(vma, next, &vm->rebind_list,
895 combined_links.rebind)
896 list_del_init(&vma->combined_links.rebind);
897 }
898free_ops:
899 list_for_each_entry_safe(op, next_op, &vops.list, link) {
900 list_del(&op->link);
901 kfree(op);
902 }
903 xe_vma_ops_fini(&vops);
904
905 return err;
906}
907
908struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
909{
910 struct dma_fence *fence = NULL;
911 struct xe_vma_ops vops;
912 struct xe_vma_op *op, *next_op;
913 struct xe_tile *tile;
914 u8 id;
915 int err;
916
917 lockdep_assert_held(&vm->lock);
918 xe_vm_assert_held(vm);
919 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
920
921 xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
922 for_each_tile(tile, vm->xe, id) {
923 vops.pt_update_ops[id].wait_vm_bookkeep = true;
924 vops.pt_update_ops[tile->id].q =
925 xe_tile_migrate_exec_queue(tile);
926 }
927
928 err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
929 if (err)
930 return ERR_PTR(err);
931
932 err = xe_vma_ops_alloc(&vops, false);
933 if (err) {
934 fence = ERR_PTR(err);
935 goto free_ops;
936 }
937
938 fence = ops_execute(vm, &vops);
939
940free_ops:
941 list_for_each_entry_safe(op, next_op, &vops.list, link) {
942 list_del(&op->link);
943 kfree(op);
944 }
945 xe_vma_ops_fini(&vops);
946
947 return fence;
948}
949
950static void xe_vma_free(struct xe_vma *vma)
951{
952 if (xe_vma_is_userptr(vma))
953 kfree(to_userptr_vma(vma));
954 else
955 kfree(vma);
956}
957
958#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
959#define VMA_CREATE_FLAG_IS_NULL BIT(1)
960#define VMA_CREATE_FLAG_DUMPABLE BIT(2)
961
962static struct xe_vma *xe_vma_create(struct xe_vm *vm,
963 struct xe_bo *bo,
964 u64 bo_offset_or_userptr,
965 u64 start, u64 end,
966 u16 pat_index, unsigned int flags)
967{
968 struct xe_vma *vma;
969 struct xe_tile *tile;
970 u8 id;
971 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
972 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
973 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
974
975 xe_assert(vm->xe, start < end);
976 xe_assert(vm->xe, end < vm->size);
977
978 /*
979 * Allocate and ensure that the xe_vma_is_userptr() return
980 * matches what was allocated.
981 */
982 if (!bo && !is_null) {
983 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
984
985 if (!uvma)
986 return ERR_PTR(-ENOMEM);
987
988 vma = &uvma->vma;
989 } else {
990 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
991 if (!vma)
992 return ERR_PTR(-ENOMEM);
993
994 if (is_null)
995 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
996 if (bo)
997 vma->gpuva.gem.obj = &bo->ttm.base;
998 }
999
1000 INIT_LIST_HEAD(&vma->combined_links.rebind);
1001
1002 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1003 vma->gpuva.vm = &vm->gpuvm;
1004 vma->gpuva.va.addr = start;
1005 vma->gpuva.va.range = end - start + 1;
1006 if (read_only)
1007 vma->gpuva.flags |= XE_VMA_READ_ONLY;
1008 if (dumpable)
1009 vma->gpuva.flags |= XE_VMA_DUMPABLE;
1010
1011 for_each_tile(tile, vm->xe, id)
1012 vma->tile_mask |= 0x1 << id;
1013
1014 if (vm->xe->info.has_atomic_enable_pte_bit)
1015 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1016
1017 vma->pat_index = pat_index;
1018
1019 if (bo) {
1020 struct drm_gpuvm_bo *vm_bo;
1021
1022 xe_bo_assert_held(bo);
1023
1024 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1025 if (IS_ERR(vm_bo)) {
1026 xe_vma_free(vma);
1027 return ERR_CAST(vm_bo);
1028 }
1029
1030 drm_gpuvm_bo_extobj_add(vm_bo);
1031 drm_gem_object_get(&bo->ttm.base);
1032 vma->gpuva.gem.offset = bo_offset_or_userptr;
1033 drm_gpuva_link(&vma->gpuva, vm_bo);
1034 drm_gpuvm_bo_put(vm_bo);
1035 } else /* userptr or null */ {
1036 if (!is_null) {
1037 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1038 u64 size = end - start + 1;
1039 int err;
1040
1041 INIT_LIST_HEAD(&userptr->invalidate_link);
1042 INIT_LIST_HEAD(&userptr->repin_link);
1043 vma->gpuva.gem.offset = bo_offset_or_userptr;
1044 mutex_init(&userptr->unmap_mutex);
1045
1046 err = mmu_interval_notifier_insert(&userptr->notifier,
1047 current->mm,
1048 xe_vma_userptr(vma), size,
1049 &vma_userptr_notifier_ops);
1050 if (err) {
1051 xe_vma_free(vma);
1052 return ERR_PTR(err);
1053 }
1054
1055 userptr->notifier_seq = LONG_MAX;
1056 }
1057
1058 xe_vm_get(vm);
1059 }
1060
1061 return vma;
1062}
1063
1064static void xe_vma_destroy_late(struct xe_vma *vma)
1065{
1066 struct xe_vm *vm = xe_vma_vm(vma);
1067
1068 if (vma->ufence) {
1069 xe_sync_ufence_put(vma->ufence);
1070 vma->ufence = NULL;
1071 }
1072
1073 if (xe_vma_is_userptr(vma)) {
1074 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1075 struct xe_userptr *userptr = &uvma->userptr;
1076
1077 if (userptr->sg)
1078 xe_hmm_userptr_free_sg(uvma);
1079
1080 /*
1081 * Since userptr pages are not pinned, we can't remove
1082 * the notifer until we're sure the GPU is not accessing
1083 * them anymore
1084 */
1085 mmu_interval_notifier_remove(&userptr->notifier);
1086 mutex_destroy(&userptr->unmap_mutex);
1087 xe_vm_put(vm);
1088 } else if (xe_vma_is_null(vma)) {
1089 xe_vm_put(vm);
1090 } else {
1091 xe_bo_put(xe_vma_bo(vma));
1092 }
1093
1094 xe_vma_free(vma);
1095}
1096
1097static void vma_destroy_work_func(struct work_struct *w)
1098{
1099 struct xe_vma *vma =
1100 container_of(w, struct xe_vma, destroy_work);
1101
1102 xe_vma_destroy_late(vma);
1103}
1104
1105static void vma_destroy_cb(struct dma_fence *fence,
1106 struct dma_fence_cb *cb)
1107{
1108 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1109
1110 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1111 queue_work(system_unbound_wq, &vma->destroy_work);
1112}
1113
1114static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1115{
1116 struct xe_vm *vm = xe_vma_vm(vma);
1117
1118 lockdep_assert_held_write(&vm->lock);
1119 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1120
1121 if (xe_vma_is_userptr(vma)) {
1122 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1123
1124 spin_lock(&vm->userptr.invalidated_lock);
1125 xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1126 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1127 spin_unlock(&vm->userptr.invalidated_lock);
1128 } else if (!xe_vma_is_null(vma)) {
1129 xe_bo_assert_held(xe_vma_bo(vma));
1130
1131 drm_gpuva_unlink(&vma->gpuva);
1132 }
1133
1134 xe_vm_assert_held(vm);
1135 if (fence) {
1136 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1137 vma_destroy_cb);
1138
1139 if (ret) {
1140 XE_WARN_ON(ret != -ENOENT);
1141 xe_vma_destroy_late(vma);
1142 }
1143 } else {
1144 xe_vma_destroy_late(vma);
1145 }
1146}
1147
1148/**
1149 * xe_vm_lock_vma() - drm_exec utility to lock a vma
1150 * @exec: The drm_exec object we're currently locking for.
1151 * @vma: The vma for witch we want to lock the vm resv and any attached
1152 * object's resv.
1153 *
1154 * Return: 0 on success, negative error code on error. In particular
1155 * may return -EDEADLK on WW transaction contention and -EINTR if
1156 * an interruptible wait is terminated by a signal.
1157 */
1158int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1159{
1160 struct xe_vm *vm = xe_vma_vm(vma);
1161 struct xe_bo *bo = xe_vma_bo(vma);
1162 int err;
1163
1164 XE_WARN_ON(!vm);
1165
1166 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1167 if (!err && bo && !bo->vm)
1168 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1169
1170 return err;
1171}
1172
1173static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1174{
1175 struct drm_exec exec;
1176 int err;
1177
1178 drm_exec_init(&exec, 0, 0);
1179 drm_exec_until_all_locked(&exec) {
1180 err = xe_vm_lock_vma(&exec, vma);
1181 drm_exec_retry_on_contention(&exec);
1182 if (XE_WARN_ON(err))
1183 break;
1184 }
1185
1186 xe_vma_destroy(vma, NULL);
1187
1188 drm_exec_fini(&exec);
1189}
1190
1191struct xe_vma *
1192xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1193{
1194 struct drm_gpuva *gpuva;
1195
1196 lockdep_assert_held(&vm->lock);
1197
1198 if (xe_vm_is_closed_or_banned(vm))
1199 return NULL;
1200
1201 xe_assert(vm->xe, start + range <= vm->size);
1202
1203 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1204
1205 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1206}
1207
1208static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1209{
1210 int err;
1211
1212 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1213 lockdep_assert_held(&vm->lock);
1214
1215 mutex_lock(&vm->snap_mutex);
1216 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1217 mutex_unlock(&vm->snap_mutex);
1218 XE_WARN_ON(err); /* Shouldn't be possible */
1219
1220 return err;
1221}
1222
1223static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1224{
1225 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1226 lockdep_assert_held(&vm->lock);
1227
1228 mutex_lock(&vm->snap_mutex);
1229 drm_gpuva_remove(&vma->gpuva);
1230 mutex_unlock(&vm->snap_mutex);
1231 if (vm->usm.last_fault_vma == vma)
1232 vm->usm.last_fault_vma = NULL;
1233}
1234
1235static struct drm_gpuva_op *xe_vm_op_alloc(void)
1236{
1237 struct xe_vma_op *op;
1238
1239 op = kzalloc(sizeof(*op), GFP_KERNEL);
1240
1241 if (unlikely(!op))
1242 return NULL;
1243
1244 return &op->base;
1245}
1246
1247static void xe_vm_free(struct drm_gpuvm *gpuvm);
1248
1249static const struct drm_gpuvm_ops gpuvm_ops = {
1250 .op_alloc = xe_vm_op_alloc,
1251 .vm_bo_validate = xe_gpuvm_validate,
1252 .vm_free = xe_vm_free,
1253};
1254
1255static u64 pde_encode_pat_index(u16 pat_index)
1256{
1257 u64 pte = 0;
1258
1259 if (pat_index & BIT(0))
1260 pte |= XE_PPGTT_PTE_PAT0;
1261
1262 if (pat_index & BIT(1))
1263 pte |= XE_PPGTT_PTE_PAT1;
1264
1265 return pte;
1266}
1267
1268static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1269{
1270 u64 pte = 0;
1271
1272 if (pat_index & BIT(0))
1273 pte |= XE_PPGTT_PTE_PAT0;
1274
1275 if (pat_index & BIT(1))
1276 pte |= XE_PPGTT_PTE_PAT1;
1277
1278 if (pat_index & BIT(2)) {
1279 if (pt_level)
1280 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1281 else
1282 pte |= XE_PPGTT_PTE_PAT2;
1283 }
1284
1285 if (pat_index & BIT(3))
1286 pte |= XELPG_PPGTT_PTE_PAT3;
1287
1288 if (pat_index & (BIT(4)))
1289 pte |= XE2_PPGTT_PTE_PAT4;
1290
1291 return pte;
1292}
1293
1294static u64 pte_encode_ps(u32 pt_level)
1295{
1296 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1297
1298 if (pt_level == 1)
1299 return XE_PDE_PS_2M;
1300 else if (pt_level == 2)
1301 return XE_PDPE_PS_1G;
1302
1303 return 0;
1304}
1305
1306static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1307 const u16 pat_index)
1308{
1309 u64 pde;
1310
1311 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1312 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1313 pde |= pde_encode_pat_index(pat_index);
1314
1315 return pde;
1316}
1317
1318static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1319 u16 pat_index, u32 pt_level)
1320{
1321 u64 pte;
1322
1323 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1324 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1325 pte |= pte_encode_pat_index(pat_index, pt_level);
1326 pte |= pte_encode_ps(pt_level);
1327
1328 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1329 pte |= XE_PPGTT_PTE_DM;
1330
1331 return pte;
1332}
1333
1334static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1335 u16 pat_index, u32 pt_level)
1336{
1337 pte |= XE_PAGE_PRESENT;
1338
1339 if (likely(!xe_vma_read_only(vma)))
1340 pte |= XE_PAGE_RW;
1341
1342 pte |= pte_encode_pat_index(pat_index, pt_level);
1343 pte |= pte_encode_ps(pt_level);
1344
1345 if (unlikely(xe_vma_is_null(vma)))
1346 pte |= XE_PTE_NULL;
1347
1348 return pte;
1349}
1350
1351static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1352 u16 pat_index,
1353 u32 pt_level, bool devmem, u64 flags)
1354{
1355 u64 pte;
1356
1357 /* Avoid passing random bits directly as flags */
1358 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1359
1360 pte = addr;
1361 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1362 pte |= pte_encode_pat_index(pat_index, pt_level);
1363 pte |= pte_encode_ps(pt_level);
1364
1365 if (devmem)
1366 pte |= XE_PPGTT_PTE_DM;
1367
1368 pte |= flags;
1369
1370 return pte;
1371}
1372
1373static const struct xe_pt_ops xelp_pt_ops = {
1374 .pte_encode_bo = xelp_pte_encode_bo,
1375 .pte_encode_vma = xelp_pte_encode_vma,
1376 .pte_encode_addr = xelp_pte_encode_addr,
1377 .pde_encode_bo = xelp_pde_encode_bo,
1378};
1379
1380static void vm_destroy_work_func(struct work_struct *w);
1381
1382/**
1383 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1384 * given tile and vm.
1385 * @xe: xe device.
1386 * @tile: tile to set up for.
1387 * @vm: vm to set up for.
1388 *
1389 * Sets up a pagetable tree with one page-table per level and a single
1390 * leaf PTE. All pagetable entries point to the single page-table or,
1391 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1392 * writes become NOPs.
1393 *
1394 * Return: 0 on success, negative error code on error.
1395 */
1396static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1397 struct xe_vm *vm)
1398{
1399 u8 id = tile->id;
1400 int i;
1401
1402 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1403 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1404 if (IS_ERR(vm->scratch_pt[id][i]))
1405 return PTR_ERR(vm->scratch_pt[id][i]);
1406
1407 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1408 }
1409
1410 return 0;
1411}
1412
1413static void xe_vm_free_scratch(struct xe_vm *vm)
1414{
1415 struct xe_tile *tile;
1416 u8 id;
1417
1418 if (!xe_vm_has_scratch(vm))
1419 return;
1420
1421 for_each_tile(tile, vm->xe, id) {
1422 u32 i;
1423
1424 if (!vm->pt_root[id])
1425 continue;
1426
1427 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1428 if (vm->scratch_pt[id][i])
1429 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1430 }
1431}
1432
1433struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1434{
1435 struct drm_gem_object *vm_resv_obj;
1436 struct xe_vm *vm;
1437 int err, number_tiles = 0;
1438 struct xe_tile *tile;
1439 u8 id;
1440
1441 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1442 if (!vm)
1443 return ERR_PTR(-ENOMEM);
1444
1445 vm->xe = xe;
1446
1447 vm->size = 1ull << xe->info.va_bits;
1448
1449 vm->flags = flags;
1450
1451 init_rwsem(&vm->lock);
1452 mutex_init(&vm->snap_mutex);
1453
1454 INIT_LIST_HEAD(&vm->rebind_list);
1455
1456 INIT_LIST_HEAD(&vm->userptr.repin_list);
1457 INIT_LIST_HEAD(&vm->userptr.invalidated);
1458 init_rwsem(&vm->userptr.notifier_lock);
1459 spin_lock_init(&vm->userptr.invalidated_lock);
1460
1461 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1462
1463 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1464
1465 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1466 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1467
1468 for_each_tile(tile, xe, id)
1469 xe_range_fence_tree_init(&vm->rftree[id]);
1470
1471 vm->pt_ops = &xelp_pt_ops;
1472
1473 /*
1474 * Long-running workloads are not protected by the scheduler references.
1475 * By design, run_job for long-running workloads returns NULL and the
1476 * scheduler drops all the references of it, hence protecting the VM
1477 * for this case is necessary.
1478 */
1479 if (flags & XE_VM_FLAG_LR_MODE)
1480 xe_pm_runtime_get_noresume(xe);
1481
1482 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1483 if (!vm_resv_obj) {
1484 err = -ENOMEM;
1485 goto err_no_resv;
1486 }
1487
1488 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1489 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1490
1491 drm_gem_object_put(vm_resv_obj);
1492
1493 err = xe_vm_lock(vm, true);
1494 if (err)
1495 goto err_close;
1496
1497 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1498 vm->flags |= XE_VM_FLAG_64K;
1499
1500 for_each_tile(tile, xe, id) {
1501 if (flags & XE_VM_FLAG_MIGRATION &&
1502 tile->id != XE_VM_FLAG_TILE_ID(flags))
1503 continue;
1504
1505 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1506 if (IS_ERR(vm->pt_root[id])) {
1507 err = PTR_ERR(vm->pt_root[id]);
1508 vm->pt_root[id] = NULL;
1509 goto err_unlock_close;
1510 }
1511 }
1512
1513 if (xe_vm_has_scratch(vm)) {
1514 for_each_tile(tile, xe, id) {
1515 if (!vm->pt_root[id])
1516 continue;
1517
1518 err = xe_vm_create_scratch(xe, tile, vm);
1519 if (err)
1520 goto err_unlock_close;
1521 }
1522 vm->batch_invalidate_tlb = true;
1523 }
1524
1525 if (vm->flags & XE_VM_FLAG_LR_MODE) {
1526 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1527 vm->batch_invalidate_tlb = false;
1528 }
1529
1530 /* Fill pt_root after allocating scratch tables */
1531 for_each_tile(tile, xe, id) {
1532 if (!vm->pt_root[id])
1533 continue;
1534
1535 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1536 }
1537 xe_vm_unlock(vm);
1538
1539 /* Kernel migration VM shouldn't have a circular loop.. */
1540 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1541 for_each_tile(tile, xe, id) {
1542 struct xe_exec_queue *q;
1543 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1544
1545 if (!vm->pt_root[id])
1546 continue;
1547
1548 q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1549 if (IS_ERR(q)) {
1550 err = PTR_ERR(q);
1551 goto err_close;
1552 }
1553 vm->q[id] = q;
1554 number_tiles++;
1555 }
1556 }
1557
1558 if (number_tiles > 1)
1559 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1560
1561 trace_xe_vm_create(vm);
1562
1563 return vm;
1564
1565err_unlock_close:
1566 xe_vm_unlock(vm);
1567err_close:
1568 xe_vm_close_and_put(vm);
1569 return ERR_PTR(err);
1570
1571err_no_resv:
1572 mutex_destroy(&vm->snap_mutex);
1573 for_each_tile(tile, xe, id)
1574 xe_range_fence_tree_fini(&vm->rftree[id]);
1575 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1576 kfree(vm);
1577 if (flags & XE_VM_FLAG_LR_MODE)
1578 xe_pm_runtime_put(xe);
1579 return ERR_PTR(err);
1580}
1581
1582static void xe_vm_close(struct xe_vm *vm)
1583{
1584 down_write(&vm->lock);
1585 vm->size = 0;
1586 up_write(&vm->lock);
1587}
1588
1589void xe_vm_close_and_put(struct xe_vm *vm)
1590{
1591 LIST_HEAD(contested);
1592 struct xe_device *xe = vm->xe;
1593 struct xe_tile *tile;
1594 struct xe_vma *vma, *next_vma;
1595 struct drm_gpuva *gpuva, *next;
1596 u8 id;
1597
1598 xe_assert(xe, !vm->preempt.num_exec_queues);
1599
1600 xe_vm_close(vm);
1601 if (xe_vm_in_preempt_fence_mode(vm))
1602 flush_work(&vm->preempt.rebind_work);
1603
1604 down_write(&vm->lock);
1605 for_each_tile(tile, xe, id) {
1606 if (vm->q[id])
1607 xe_exec_queue_last_fence_put(vm->q[id], vm);
1608 }
1609 up_write(&vm->lock);
1610
1611 for_each_tile(tile, xe, id) {
1612 if (vm->q[id]) {
1613 xe_exec_queue_kill(vm->q[id]);
1614 xe_exec_queue_put(vm->q[id]);
1615 vm->q[id] = NULL;
1616 }
1617 }
1618
1619 down_write(&vm->lock);
1620 xe_vm_lock(vm, false);
1621 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1622 vma = gpuva_to_vma(gpuva);
1623
1624 if (xe_vma_has_no_bo(vma)) {
1625 down_read(&vm->userptr.notifier_lock);
1626 vma->gpuva.flags |= XE_VMA_DESTROYED;
1627 up_read(&vm->userptr.notifier_lock);
1628 }
1629
1630 xe_vm_remove_vma(vm, vma);
1631
1632 /* easy case, remove from VMA? */
1633 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1634 list_del_init(&vma->combined_links.rebind);
1635 xe_vma_destroy(vma, NULL);
1636 continue;
1637 }
1638
1639 list_move_tail(&vma->combined_links.destroy, &contested);
1640 vma->gpuva.flags |= XE_VMA_DESTROYED;
1641 }
1642
1643 /*
1644 * All vm operations will add shared fences to resv.
1645 * The only exception is eviction for a shared object,
1646 * but even so, the unbind when evicted would still
1647 * install a fence to resv. Hence it's safe to
1648 * destroy the pagetables immediately.
1649 */
1650 xe_vm_free_scratch(vm);
1651
1652 for_each_tile(tile, xe, id) {
1653 if (vm->pt_root[id]) {
1654 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1655 vm->pt_root[id] = NULL;
1656 }
1657 }
1658 xe_vm_unlock(vm);
1659
1660 /*
1661 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1662 * Since we hold a refcount to the bo, we can remove and free
1663 * the members safely without locking.
1664 */
1665 list_for_each_entry_safe(vma, next_vma, &contested,
1666 combined_links.destroy) {
1667 list_del_init(&vma->combined_links.destroy);
1668 xe_vma_destroy_unlocked(vma);
1669 }
1670
1671 up_write(&vm->lock);
1672
1673 down_write(&xe->usm.lock);
1674 if (vm->usm.asid) {
1675 void *lookup;
1676
1677 xe_assert(xe, xe->info.has_asid);
1678 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1679
1680 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1681 xe_assert(xe, lookup == vm);
1682 }
1683 up_write(&xe->usm.lock);
1684
1685 for_each_tile(tile, xe, id)
1686 xe_range_fence_tree_fini(&vm->rftree[id]);
1687
1688 xe_vm_put(vm);
1689}
1690
1691static void vm_destroy_work_func(struct work_struct *w)
1692{
1693 struct xe_vm *vm =
1694 container_of(w, struct xe_vm, destroy_work);
1695 struct xe_device *xe = vm->xe;
1696 struct xe_tile *tile;
1697 u8 id;
1698
1699 /* xe_vm_close_and_put was not called? */
1700 xe_assert(xe, !vm->size);
1701
1702 if (xe_vm_in_preempt_fence_mode(vm))
1703 flush_work(&vm->preempt.rebind_work);
1704
1705 mutex_destroy(&vm->snap_mutex);
1706
1707 if (vm->flags & XE_VM_FLAG_LR_MODE)
1708 xe_pm_runtime_put(xe);
1709
1710 for_each_tile(tile, xe, id)
1711 XE_WARN_ON(vm->pt_root[id]);
1712
1713 trace_xe_vm_free(vm);
1714
1715 ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1716
1717 if (vm->xef)
1718 xe_file_put(vm->xef);
1719
1720 kfree(vm);
1721}
1722
1723static void xe_vm_free(struct drm_gpuvm *gpuvm)
1724{
1725 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1726
1727 /* To destroy the VM we need to be able to sleep */
1728 queue_work(system_unbound_wq, &vm->destroy_work);
1729}
1730
1731struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1732{
1733 struct xe_vm *vm;
1734
1735 mutex_lock(&xef->vm.lock);
1736 vm = xa_load(&xef->vm.xa, id);
1737 if (vm)
1738 xe_vm_get(vm);
1739 mutex_unlock(&xef->vm.lock);
1740
1741 return vm;
1742}
1743
1744u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1745{
1746 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1747 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1748}
1749
1750static struct xe_exec_queue *
1751to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1752{
1753 return q ? q : vm->q[0];
1754}
1755
1756static struct xe_user_fence *
1757find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1758{
1759 unsigned int i;
1760
1761 for (i = 0; i < num_syncs; i++) {
1762 struct xe_sync_entry *e = &syncs[i];
1763
1764 if (xe_sync_is_ufence(e))
1765 return xe_sync_ufence_get(e);
1766 }
1767
1768 return NULL;
1769}
1770
1771#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1772 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1773 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1774
1775int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1776 struct drm_file *file)
1777{
1778 struct xe_device *xe = to_xe_device(dev);
1779 struct xe_file *xef = to_xe_file(file);
1780 struct drm_xe_vm_create *args = data;
1781 struct xe_tile *tile;
1782 struct xe_vm *vm;
1783 u32 id, asid;
1784 int err;
1785 u32 flags = 0;
1786
1787 if (XE_IOCTL_DBG(xe, args->extensions))
1788 return -EINVAL;
1789
1790 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1791 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1792
1793 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1794 !xe->info.has_usm))
1795 return -EINVAL;
1796
1797 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1798 return -EINVAL;
1799
1800 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1801 return -EINVAL;
1802
1803 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1804 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1805 return -EINVAL;
1806
1807 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1808 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1809 return -EINVAL;
1810
1811 if (XE_IOCTL_DBG(xe, args->extensions))
1812 return -EINVAL;
1813
1814 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1815 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1816 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1817 flags |= XE_VM_FLAG_LR_MODE;
1818 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1819 flags |= XE_VM_FLAG_FAULT_MODE;
1820
1821 vm = xe_vm_create(xe, flags);
1822 if (IS_ERR(vm))
1823 return PTR_ERR(vm);
1824
1825 if (xe->info.has_asid) {
1826 down_write(&xe->usm.lock);
1827 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1828 XA_LIMIT(1, XE_MAX_ASID - 1),
1829 &xe->usm.next_asid, GFP_KERNEL);
1830 up_write(&xe->usm.lock);
1831 if (err < 0)
1832 goto err_close_and_put;
1833
1834 vm->usm.asid = asid;
1835 }
1836
1837 vm->xef = xe_file_get(xef);
1838
1839 /* Record BO memory for VM pagetable created against client */
1840 for_each_tile(tile, xe, id)
1841 if (vm->pt_root[id])
1842 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1843
1844#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1845 /* Warning: Security issue - never enable by default */
1846 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1847#endif
1848
1849 /* user id alloc must always be last in ioctl to prevent UAF */
1850 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1851 if (err)
1852 goto err_close_and_put;
1853
1854 args->vm_id = id;
1855
1856 return 0;
1857
1858err_close_and_put:
1859 xe_vm_close_and_put(vm);
1860
1861 return err;
1862}
1863
1864int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1865 struct drm_file *file)
1866{
1867 struct xe_device *xe = to_xe_device(dev);
1868 struct xe_file *xef = to_xe_file(file);
1869 struct drm_xe_vm_destroy *args = data;
1870 struct xe_vm *vm;
1871 int err = 0;
1872
1873 if (XE_IOCTL_DBG(xe, args->pad) ||
1874 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1875 return -EINVAL;
1876
1877 mutex_lock(&xef->vm.lock);
1878 vm = xa_load(&xef->vm.xa, args->vm_id);
1879 if (XE_IOCTL_DBG(xe, !vm))
1880 err = -ENOENT;
1881 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1882 err = -EBUSY;
1883 else
1884 xa_erase(&xef->vm.xa, args->vm_id);
1885 mutex_unlock(&xef->vm.lock);
1886
1887 if (!err)
1888 xe_vm_close_and_put(vm);
1889
1890 return err;
1891}
1892
1893static const u32 region_to_mem_type[] = {
1894 XE_PL_TT,
1895 XE_PL_VRAM0,
1896 XE_PL_VRAM1,
1897};
1898
1899static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1900 bool post_commit)
1901{
1902 down_read(&vm->userptr.notifier_lock);
1903 vma->gpuva.flags |= XE_VMA_DESTROYED;
1904 up_read(&vm->userptr.notifier_lock);
1905 if (post_commit)
1906 xe_vm_remove_vma(vm, vma);
1907}
1908
1909#undef ULL
1910#define ULL unsigned long long
1911
1912#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
1913static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1914{
1915 struct xe_vma *vma;
1916
1917 switch (op->op) {
1918 case DRM_GPUVA_OP_MAP:
1919 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1920 (ULL)op->map.va.addr, (ULL)op->map.va.range);
1921 break;
1922 case DRM_GPUVA_OP_REMAP:
1923 vma = gpuva_to_vma(op->remap.unmap->va);
1924 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1925 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1926 op->remap.unmap->keep ? 1 : 0);
1927 if (op->remap.prev)
1928 vm_dbg(&xe->drm,
1929 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
1930 (ULL)op->remap.prev->va.addr,
1931 (ULL)op->remap.prev->va.range);
1932 if (op->remap.next)
1933 vm_dbg(&xe->drm,
1934 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
1935 (ULL)op->remap.next->va.addr,
1936 (ULL)op->remap.next->va.range);
1937 break;
1938 case DRM_GPUVA_OP_UNMAP:
1939 vma = gpuva_to_vma(op->unmap.va);
1940 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
1941 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
1942 op->unmap.keep ? 1 : 0);
1943 break;
1944 case DRM_GPUVA_OP_PREFETCH:
1945 vma = gpuva_to_vma(op->prefetch.va);
1946 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
1947 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
1948 break;
1949 default:
1950 drm_warn(&xe->drm, "NOT POSSIBLE");
1951 }
1952}
1953#else
1954static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1955{
1956}
1957#endif
1958
1959/*
1960 * Create operations list from IOCTL arguments, setup operations fields so parse
1961 * and commit steps are decoupled from IOCTL arguments. This step can fail.
1962 */
1963static struct drm_gpuva_ops *
1964vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
1965 u64 bo_offset_or_userptr, u64 addr, u64 range,
1966 u32 operation, u32 flags,
1967 u32 prefetch_region, u16 pat_index)
1968{
1969 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
1970 struct drm_gpuva_ops *ops;
1971 struct drm_gpuva_op *__op;
1972 struct drm_gpuvm_bo *vm_bo;
1973 int err;
1974
1975 lockdep_assert_held_write(&vm->lock);
1976
1977 vm_dbg(&vm->xe->drm,
1978 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
1979 operation, (ULL)addr, (ULL)range,
1980 (ULL)bo_offset_or_userptr);
1981
1982 switch (operation) {
1983 case DRM_XE_VM_BIND_OP_MAP:
1984 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
1985 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
1986 obj, bo_offset_or_userptr);
1987 break;
1988 case DRM_XE_VM_BIND_OP_UNMAP:
1989 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
1990 break;
1991 case DRM_XE_VM_BIND_OP_PREFETCH:
1992 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
1993 break;
1994 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
1995 xe_assert(vm->xe, bo);
1996
1997 err = xe_bo_lock(bo, true);
1998 if (err)
1999 return ERR_PTR(err);
2000
2001 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2002 if (IS_ERR(vm_bo)) {
2003 xe_bo_unlock(bo);
2004 return ERR_CAST(vm_bo);
2005 }
2006
2007 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2008 drm_gpuvm_bo_put(vm_bo);
2009 xe_bo_unlock(bo);
2010 break;
2011 default:
2012 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2013 ops = ERR_PTR(-EINVAL);
2014 }
2015 if (IS_ERR(ops))
2016 return ops;
2017
2018 drm_gpuva_for_each_op(__op, ops) {
2019 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2020
2021 if (__op->op == DRM_GPUVA_OP_MAP) {
2022 op->map.immediate =
2023 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2024 op->map.read_only =
2025 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2026 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2027 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2028 op->map.pat_index = pat_index;
2029 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2030 op->prefetch.region = prefetch_region;
2031 }
2032
2033 print_op(vm->xe, __op);
2034 }
2035
2036 return ops;
2037}
2038
2039static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2040 u16 pat_index, unsigned int flags)
2041{
2042 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2043 struct drm_exec exec;
2044 struct xe_vma *vma;
2045 int err = 0;
2046
2047 lockdep_assert_held_write(&vm->lock);
2048
2049 if (bo) {
2050 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2051 drm_exec_until_all_locked(&exec) {
2052 err = 0;
2053 if (!bo->vm) {
2054 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2055 drm_exec_retry_on_contention(&exec);
2056 }
2057 if (!err) {
2058 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2059 drm_exec_retry_on_contention(&exec);
2060 }
2061 if (err) {
2062 drm_exec_fini(&exec);
2063 return ERR_PTR(err);
2064 }
2065 }
2066 }
2067 vma = xe_vma_create(vm, bo, op->gem.offset,
2068 op->va.addr, op->va.addr +
2069 op->va.range - 1, pat_index, flags);
2070 if (IS_ERR(vma))
2071 goto err_unlock;
2072
2073 if (xe_vma_is_userptr(vma))
2074 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2075 else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2076 err = add_preempt_fences(vm, bo);
2077
2078err_unlock:
2079 if (bo)
2080 drm_exec_fini(&exec);
2081
2082 if (err) {
2083 prep_vma_destroy(vm, vma, false);
2084 xe_vma_destroy_unlocked(vma);
2085 vma = ERR_PTR(err);
2086 }
2087
2088 return vma;
2089}
2090
2091static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2092{
2093 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2094 return SZ_1G;
2095 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2096 return SZ_2M;
2097 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2098 return SZ_64K;
2099 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2100 return SZ_4K;
2101
2102 return SZ_1G; /* Uninitialized, used max size */
2103}
2104
2105static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2106{
2107 switch (size) {
2108 case SZ_1G:
2109 vma->gpuva.flags |= XE_VMA_PTE_1G;
2110 break;
2111 case SZ_2M:
2112 vma->gpuva.flags |= XE_VMA_PTE_2M;
2113 break;
2114 case SZ_64K:
2115 vma->gpuva.flags |= XE_VMA_PTE_64K;
2116 break;
2117 case SZ_4K:
2118 vma->gpuva.flags |= XE_VMA_PTE_4K;
2119 break;
2120 }
2121}
2122
2123static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2124{
2125 int err = 0;
2126
2127 lockdep_assert_held_write(&vm->lock);
2128
2129 switch (op->base.op) {
2130 case DRM_GPUVA_OP_MAP:
2131 err |= xe_vm_insert_vma(vm, op->map.vma);
2132 if (!err)
2133 op->flags |= XE_VMA_OP_COMMITTED;
2134 break;
2135 case DRM_GPUVA_OP_REMAP:
2136 {
2137 u8 tile_present =
2138 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2139
2140 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2141 true);
2142 op->flags |= XE_VMA_OP_COMMITTED;
2143
2144 if (op->remap.prev) {
2145 err |= xe_vm_insert_vma(vm, op->remap.prev);
2146 if (!err)
2147 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2148 if (!err && op->remap.skip_prev) {
2149 op->remap.prev->tile_present =
2150 tile_present;
2151 op->remap.prev = NULL;
2152 }
2153 }
2154 if (op->remap.next) {
2155 err |= xe_vm_insert_vma(vm, op->remap.next);
2156 if (!err)
2157 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2158 if (!err && op->remap.skip_next) {
2159 op->remap.next->tile_present =
2160 tile_present;
2161 op->remap.next = NULL;
2162 }
2163 }
2164
2165 /* Adjust for partial unbind after removin VMA from VM */
2166 if (!err) {
2167 op->base.remap.unmap->va->va.addr = op->remap.start;
2168 op->base.remap.unmap->va->va.range = op->remap.range;
2169 }
2170 break;
2171 }
2172 case DRM_GPUVA_OP_UNMAP:
2173 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2174 op->flags |= XE_VMA_OP_COMMITTED;
2175 break;
2176 case DRM_GPUVA_OP_PREFETCH:
2177 op->flags |= XE_VMA_OP_COMMITTED;
2178 break;
2179 default:
2180 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2181 }
2182
2183 return err;
2184}
2185
2186static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2187 struct xe_vma_ops *vops)
2188{
2189 struct xe_device *xe = vm->xe;
2190 struct drm_gpuva_op *__op;
2191 struct xe_tile *tile;
2192 u8 id, tile_mask = 0;
2193 int err = 0;
2194
2195 lockdep_assert_held_write(&vm->lock);
2196
2197 for_each_tile(tile, vm->xe, id)
2198 tile_mask |= 0x1 << id;
2199
2200 drm_gpuva_for_each_op(__op, ops) {
2201 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2202 struct xe_vma *vma;
2203 unsigned int flags = 0;
2204
2205 INIT_LIST_HEAD(&op->link);
2206 list_add_tail(&op->link, &vops->list);
2207 op->tile_mask = tile_mask;
2208
2209 switch (op->base.op) {
2210 case DRM_GPUVA_OP_MAP:
2211 {
2212 flags |= op->map.read_only ?
2213 VMA_CREATE_FLAG_READ_ONLY : 0;
2214 flags |= op->map.is_null ?
2215 VMA_CREATE_FLAG_IS_NULL : 0;
2216 flags |= op->map.dumpable ?
2217 VMA_CREATE_FLAG_DUMPABLE : 0;
2218
2219 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2220 flags);
2221 if (IS_ERR(vma))
2222 return PTR_ERR(vma);
2223
2224 op->map.vma = vma;
2225 if (op->map.immediate || !xe_vm_in_fault_mode(vm))
2226 xe_vma_ops_incr_pt_update_ops(vops,
2227 op->tile_mask);
2228 break;
2229 }
2230 case DRM_GPUVA_OP_REMAP:
2231 {
2232 struct xe_vma *old =
2233 gpuva_to_vma(op->base.remap.unmap->va);
2234
2235 op->remap.start = xe_vma_start(old);
2236 op->remap.range = xe_vma_size(old);
2237
2238 if (op->base.remap.prev) {
2239 flags |= op->base.remap.unmap->va->flags &
2240 XE_VMA_READ_ONLY ?
2241 VMA_CREATE_FLAG_READ_ONLY : 0;
2242 flags |= op->base.remap.unmap->va->flags &
2243 DRM_GPUVA_SPARSE ?
2244 VMA_CREATE_FLAG_IS_NULL : 0;
2245 flags |= op->base.remap.unmap->va->flags &
2246 XE_VMA_DUMPABLE ?
2247 VMA_CREATE_FLAG_DUMPABLE : 0;
2248
2249 vma = new_vma(vm, op->base.remap.prev,
2250 old->pat_index, flags);
2251 if (IS_ERR(vma))
2252 return PTR_ERR(vma);
2253
2254 op->remap.prev = vma;
2255
2256 /*
2257 * Userptr creates a new SG mapping so
2258 * we must also rebind.
2259 */
2260 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2261 IS_ALIGNED(xe_vma_end(vma),
2262 xe_vma_max_pte_size(old));
2263 if (op->remap.skip_prev) {
2264 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2265 op->remap.range -=
2266 xe_vma_end(vma) -
2267 xe_vma_start(old);
2268 op->remap.start = xe_vma_end(vma);
2269 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2270 (ULL)op->remap.start,
2271 (ULL)op->remap.range);
2272 } else {
2273 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2274 }
2275 }
2276
2277 if (op->base.remap.next) {
2278 flags |= op->base.remap.unmap->va->flags &
2279 XE_VMA_READ_ONLY ?
2280 VMA_CREATE_FLAG_READ_ONLY : 0;
2281 flags |= op->base.remap.unmap->va->flags &
2282 DRM_GPUVA_SPARSE ?
2283 VMA_CREATE_FLAG_IS_NULL : 0;
2284 flags |= op->base.remap.unmap->va->flags &
2285 XE_VMA_DUMPABLE ?
2286 VMA_CREATE_FLAG_DUMPABLE : 0;
2287
2288 vma = new_vma(vm, op->base.remap.next,
2289 old->pat_index, flags);
2290 if (IS_ERR(vma))
2291 return PTR_ERR(vma);
2292
2293 op->remap.next = vma;
2294
2295 /*
2296 * Userptr creates a new SG mapping so
2297 * we must also rebind.
2298 */
2299 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2300 IS_ALIGNED(xe_vma_start(vma),
2301 xe_vma_max_pte_size(old));
2302 if (op->remap.skip_next) {
2303 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2304 op->remap.range -=
2305 xe_vma_end(old) -
2306 xe_vma_start(vma);
2307 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2308 (ULL)op->remap.start,
2309 (ULL)op->remap.range);
2310 } else {
2311 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2312 }
2313 }
2314 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2315 break;
2316 }
2317 case DRM_GPUVA_OP_UNMAP:
2318 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2319 break;
2320 case DRM_GPUVA_OP_PREFETCH:
2321 vma = gpuva_to_vma(op->base.prefetch.va);
2322
2323 if (xe_vma_is_userptr(vma)) {
2324 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2325 if (err)
2326 return err;
2327 }
2328
2329 xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
2330 break;
2331 default:
2332 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2333 }
2334
2335 err = xe_vma_op_commit(vm, op);
2336 if (err)
2337 return err;
2338 }
2339
2340 return 0;
2341}
2342
2343static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2344 bool post_commit, bool prev_post_commit,
2345 bool next_post_commit)
2346{
2347 lockdep_assert_held_write(&vm->lock);
2348
2349 switch (op->base.op) {
2350 case DRM_GPUVA_OP_MAP:
2351 if (op->map.vma) {
2352 prep_vma_destroy(vm, op->map.vma, post_commit);
2353 xe_vma_destroy_unlocked(op->map.vma);
2354 }
2355 break;
2356 case DRM_GPUVA_OP_UNMAP:
2357 {
2358 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2359
2360 if (vma) {
2361 down_read(&vm->userptr.notifier_lock);
2362 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2363 up_read(&vm->userptr.notifier_lock);
2364 if (post_commit)
2365 xe_vm_insert_vma(vm, vma);
2366 }
2367 break;
2368 }
2369 case DRM_GPUVA_OP_REMAP:
2370 {
2371 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2372
2373 if (op->remap.prev) {
2374 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2375 xe_vma_destroy_unlocked(op->remap.prev);
2376 }
2377 if (op->remap.next) {
2378 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2379 xe_vma_destroy_unlocked(op->remap.next);
2380 }
2381 if (vma) {
2382 down_read(&vm->userptr.notifier_lock);
2383 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2384 up_read(&vm->userptr.notifier_lock);
2385 if (post_commit)
2386 xe_vm_insert_vma(vm, vma);
2387 }
2388 break;
2389 }
2390 case DRM_GPUVA_OP_PREFETCH:
2391 /* Nothing to do */
2392 break;
2393 default:
2394 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2395 }
2396}
2397
2398static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2399 struct drm_gpuva_ops **ops,
2400 int num_ops_list)
2401{
2402 int i;
2403
2404 for (i = num_ops_list - 1; i >= 0; --i) {
2405 struct drm_gpuva_ops *__ops = ops[i];
2406 struct drm_gpuva_op *__op;
2407
2408 if (!__ops)
2409 continue;
2410
2411 drm_gpuva_for_each_op_reverse(__op, __ops) {
2412 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2413
2414 xe_vma_op_unwind(vm, op,
2415 op->flags & XE_VMA_OP_COMMITTED,
2416 op->flags & XE_VMA_OP_PREV_COMMITTED,
2417 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2418 }
2419 }
2420}
2421
2422static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2423 bool validate)
2424{
2425 struct xe_bo *bo = xe_vma_bo(vma);
2426 int err = 0;
2427
2428 if (bo) {
2429 if (!bo->vm)
2430 err = drm_exec_lock_obj(exec, &bo->ttm.base);
2431 if (!err && validate)
2432 err = xe_bo_validate(bo, xe_vma_vm(vma), true);
2433 }
2434
2435 return err;
2436}
2437
2438static int check_ufence(struct xe_vma *vma)
2439{
2440 if (vma->ufence) {
2441 struct xe_user_fence * const f = vma->ufence;
2442
2443 if (!xe_sync_ufence_get_status(f))
2444 return -EBUSY;
2445
2446 vma->ufence = NULL;
2447 xe_sync_ufence_put(f);
2448 }
2449
2450 return 0;
2451}
2452
2453static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2454 struct xe_vma_op *op)
2455{
2456 int err = 0;
2457
2458 switch (op->base.op) {
2459 case DRM_GPUVA_OP_MAP:
2460 err = vma_lock_and_validate(exec, op->map.vma,
2461 !xe_vm_in_fault_mode(vm) ||
2462 op->map.immediate);
2463 break;
2464 case DRM_GPUVA_OP_REMAP:
2465 err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2466 if (err)
2467 break;
2468
2469 err = vma_lock_and_validate(exec,
2470 gpuva_to_vma(op->base.remap.unmap->va),
2471 false);
2472 if (!err && op->remap.prev)
2473 err = vma_lock_and_validate(exec, op->remap.prev, true);
2474 if (!err && op->remap.next)
2475 err = vma_lock_and_validate(exec, op->remap.next, true);
2476 break;
2477 case DRM_GPUVA_OP_UNMAP:
2478 err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2479 if (err)
2480 break;
2481
2482 err = vma_lock_and_validate(exec,
2483 gpuva_to_vma(op->base.unmap.va),
2484 false);
2485 break;
2486 case DRM_GPUVA_OP_PREFETCH:
2487 {
2488 struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2489 u32 region = op->prefetch.region;
2490
2491 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2492
2493 err = vma_lock_and_validate(exec,
2494 gpuva_to_vma(op->base.prefetch.va),
2495 false);
2496 if (!err && !xe_vma_has_no_bo(vma))
2497 err = xe_bo_migrate(xe_vma_bo(vma),
2498 region_to_mem_type[region]);
2499 break;
2500 }
2501 default:
2502 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2503 }
2504
2505 return err;
2506}
2507
2508static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2509 struct xe_vm *vm,
2510 struct xe_vma_ops *vops)
2511{
2512 struct xe_vma_op *op;
2513 int err;
2514
2515 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2516 if (err)
2517 return err;
2518
2519 list_for_each_entry(op, &vops->list, link) {
2520 err = op_lock_and_prep(exec, vm, op);
2521 if (err)
2522 return err;
2523 }
2524
2525#ifdef TEST_VM_OPS_ERROR
2526 if (vops->inject_error &&
2527 vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
2528 return -ENOSPC;
2529#endif
2530
2531 return 0;
2532}
2533
2534static void op_trace(struct xe_vma_op *op)
2535{
2536 switch (op->base.op) {
2537 case DRM_GPUVA_OP_MAP:
2538 trace_xe_vma_bind(op->map.vma);
2539 break;
2540 case DRM_GPUVA_OP_REMAP:
2541 trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
2542 if (op->remap.prev)
2543 trace_xe_vma_bind(op->remap.prev);
2544 if (op->remap.next)
2545 trace_xe_vma_bind(op->remap.next);
2546 break;
2547 case DRM_GPUVA_OP_UNMAP:
2548 trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
2549 break;
2550 case DRM_GPUVA_OP_PREFETCH:
2551 trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
2552 break;
2553 default:
2554 XE_WARN_ON("NOT POSSIBLE");
2555 }
2556}
2557
2558static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
2559{
2560 struct xe_vma_op *op;
2561
2562 list_for_each_entry(op, &vops->list, link)
2563 op_trace(op);
2564}
2565
2566static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
2567{
2568 struct xe_exec_queue *q = vops->q;
2569 struct xe_tile *tile;
2570 int number_tiles = 0;
2571 u8 id;
2572
2573 for_each_tile(tile, vm->xe, id) {
2574 if (vops->pt_update_ops[id].num_ops)
2575 ++number_tiles;
2576
2577 if (vops->pt_update_ops[id].q)
2578 continue;
2579
2580 if (q) {
2581 vops->pt_update_ops[id].q = q;
2582 if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
2583 q = list_next_entry(q, multi_gt_list);
2584 } else {
2585 vops->pt_update_ops[id].q = vm->q[id];
2586 }
2587 }
2588
2589 return number_tiles;
2590}
2591
2592static struct dma_fence *ops_execute(struct xe_vm *vm,
2593 struct xe_vma_ops *vops)
2594{
2595 struct xe_tile *tile;
2596 struct dma_fence *fence = NULL;
2597 struct dma_fence **fences = NULL;
2598 struct dma_fence_array *cf = NULL;
2599 int number_tiles = 0, current_fence = 0, err;
2600 u8 id;
2601
2602 number_tiles = vm_ops_setup_tile_args(vm, vops);
2603 if (number_tiles == 0)
2604 return ERR_PTR(-ENODATA);
2605
2606 if (number_tiles > 1) {
2607 fences = kmalloc_array(number_tiles, sizeof(*fences),
2608 GFP_KERNEL);
2609 if (!fences) {
2610 fence = ERR_PTR(-ENOMEM);
2611 goto err_trace;
2612 }
2613 }
2614
2615 for_each_tile(tile, vm->xe, id) {
2616 if (!vops->pt_update_ops[id].num_ops)
2617 continue;
2618
2619 err = xe_pt_update_ops_prepare(tile, vops);
2620 if (err) {
2621 fence = ERR_PTR(err);
2622 goto err_out;
2623 }
2624 }
2625
2626 trace_xe_vm_ops_execute(vops);
2627
2628 for_each_tile(tile, vm->xe, id) {
2629 if (!vops->pt_update_ops[id].num_ops)
2630 continue;
2631
2632 fence = xe_pt_update_ops_run(tile, vops);
2633 if (IS_ERR(fence))
2634 goto err_out;
2635
2636 if (fences)
2637 fences[current_fence++] = fence;
2638 }
2639
2640 if (fences) {
2641 cf = dma_fence_array_create(number_tiles, fences,
2642 vm->composite_fence_ctx,
2643 vm->composite_fence_seqno++,
2644 false);
2645 if (!cf) {
2646 --vm->composite_fence_seqno;
2647 fence = ERR_PTR(-ENOMEM);
2648 goto err_out;
2649 }
2650 fence = &cf->base;
2651 }
2652
2653 for_each_tile(tile, vm->xe, id) {
2654 if (!vops->pt_update_ops[id].num_ops)
2655 continue;
2656
2657 xe_pt_update_ops_fini(tile, vops);
2658 }
2659
2660 return fence;
2661
2662err_out:
2663 for_each_tile(tile, vm->xe, id) {
2664 if (!vops->pt_update_ops[id].num_ops)
2665 continue;
2666
2667 xe_pt_update_ops_abort(tile, vops);
2668 }
2669 while (current_fence)
2670 dma_fence_put(fences[--current_fence]);
2671 kfree(fences);
2672 kfree(cf);
2673
2674err_trace:
2675 trace_xe_vm_ops_fail(vm);
2676 return fence;
2677}
2678
2679static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
2680{
2681 if (vma->ufence)
2682 xe_sync_ufence_put(vma->ufence);
2683 vma->ufence = __xe_sync_ufence_get(ufence);
2684}
2685
2686static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
2687 struct xe_user_fence *ufence)
2688{
2689 switch (op->base.op) {
2690 case DRM_GPUVA_OP_MAP:
2691 vma_add_ufence(op->map.vma, ufence);
2692 break;
2693 case DRM_GPUVA_OP_REMAP:
2694 if (op->remap.prev)
2695 vma_add_ufence(op->remap.prev, ufence);
2696 if (op->remap.next)
2697 vma_add_ufence(op->remap.next, ufence);
2698 break;
2699 case DRM_GPUVA_OP_UNMAP:
2700 break;
2701 case DRM_GPUVA_OP_PREFETCH:
2702 vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
2703 break;
2704 default:
2705 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2706 }
2707}
2708
2709static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
2710 struct dma_fence *fence)
2711{
2712 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
2713 struct xe_user_fence *ufence;
2714 struct xe_vma_op *op;
2715 int i;
2716
2717 ufence = find_ufence_get(vops->syncs, vops->num_syncs);
2718 list_for_each_entry(op, &vops->list, link) {
2719 if (ufence)
2720 op_add_ufence(vm, op, ufence);
2721
2722 if (op->base.op == DRM_GPUVA_OP_UNMAP)
2723 xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
2724 else if (op->base.op == DRM_GPUVA_OP_REMAP)
2725 xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
2726 fence);
2727 }
2728 if (ufence)
2729 xe_sync_ufence_put(ufence);
2730 for (i = 0; i < vops->num_syncs; i++)
2731 xe_sync_entry_signal(vops->syncs + i, fence);
2732 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
2733 dma_fence_put(fence);
2734}
2735
2736static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2737 struct xe_vma_ops *vops)
2738{
2739 struct drm_exec exec;
2740 struct dma_fence *fence;
2741 int err;
2742
2743 lockdep_assert_held_write(&vm->lock);
2744
2745 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
2746 DRM_EXEC_IGNORE_DUPLICATES, 0);
2747 drm_exec_until_all_locked(&exec) {
2748 err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
2749 drm_exec_retry_on_contention(&exec);
2750 if (err)
2751 goto unlock;
2752
2753 fence = ops_execute(vm, vops);
2754 if (IS_ERR(fence)) {
2755 err = PTR_ERR(fence);
2756 goto unlock;
2757 }
2758
2759 vm_bind_ioctl_ops_fini(vm, vops, fence);
2760 }
2761
2762unlock:
2763 drm_exec_fini(&exec);
2764 return err;
2765}
2766
2767#define SUPPORTED_FLAGS_STUB \
2768 (DRM_XE_VM_BIND_FLAG_READONLY | \
2769 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2770 DRM_XE_VM_BIND_FLAG_NULL | \
2771 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2772
2773#ifdef TEST_VM_OPS_ERROR
2774#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
2775#else
2776#define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
2777#endif
2778
2779#define XE_64K_PAGE_MASK 0xffffull
2780#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2781
2782static int vm_bind_ioctl_check_args(struct xe_device *xe,
2783 struct drm_xe_vm_bind *args,
2784 struct drm_xe_vm_bind_op **bind_ops)
2785{
2786 int err;
2787 int i;
2788
2789 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2790 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2791 return -EINVAL;
2792
2793 if (XE_IOCTL_DBG(xe, args->extensions))
2794 return -EINVAL;
2795
2796 if (args->num_binds > 1) {
2797 u64 __user *bind_user =
2798 u64_to_user_ptr(args->vector_of_binds);
2799
2800 *bind_ops = kvmalloc_array(args->num_binds,
2801 sizeof(struct drm_xe_vm_bind_op),
2802 GFP_KERNEL | __GFP_ACCOUNT);
2803 if (!*bind_ops)
2804 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
2805
2806 err = __copy_from_user(*bind_ops, bind_user,
2807 sizeof(struct drm_xe_vm_bind_op) *
2808 args->num_binds);
2809 if (XE_IOCTL_DBG(xe, err)) {
2810 err = -EFAULT;
2811 goto free_bind_ops;
2812 }
2813 } else {
2814 *bind_ops = &args->bind;
2815 }
2816
2817 for (i = 0; i < args->num_binds; ++i) {
2818 u64 range = (*bind_ops)[i].range;
2819 u64 addr = (*bind_ops)[i].addr;
2820 u32 op = (*bind_ops)[i].op;
2821 u32 flags = (*bind_ops)[i].flags;
2822 u32 obj = (*bind_ops)[i].obj;
2823 u64 obj_offset = (*bind_ops)[i].obj_offset;
2824 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2825 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2826 u16 pat_index = (*bind_ops)[i].pat_index;
2827 u16 coh_mode;
2828
2829 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2830 err = -EINVAL;
2831 goto free_bind_ops;
2832 }
2833
2834 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2835 (*bind_ops)[i].pat_index = pat_index;
2836 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2837 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2838 err = -EINVAL;
2839 goto free_bind_ops;
2840 }
2841
2842 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2843 err = -EINVAL;
2844 goto free_bind_ops;
2845 }
2846
2847 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2848 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2849 XE_IOCTL_DBG(xe, obj && is_null) ||
2850 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2851 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2852 is_null) ||
2853 XE_IOCTL_DBG(xe, !obj &&
2854 op == DRM_XE_VM_BIND_OP_MAP &&
2855 !is_null) ||
2856 XE_IOCTL_DBG(xe, !obj &&
2857 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2858 XE_IOCTL_DBG(xe, addr &&
2859 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2860 XE_IOCTL_DBG(xe, range &&
2861 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2862 XE_IOCTL_DBG(xe, obj &&
2863 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2864 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2865 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2866 XE_IOCTL_DBG(xe, obj &&
2867 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2868 XE_IOCTL_DBG(xe, prefetch_region &&
2869 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2870 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2871 xe->info.mem_region_mask)) ||
2872 XE_IOCTL_DBG(xe, obj &&
2873 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2874 err = -EINVAL;
2875 goto free_bind_ops;
2876 }
2877
2878 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2879 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2880 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2881 XE_IOCTL_DBG(xe, !range &&
2882 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2883 err = -EINVAL;
2884 goto free_bind_ops;
2885 }
2886 }
2887
2888 return 0;
2889
2890free_bind_ops:
2891 if (args->num_binds > 1)
2892 kvfree(*bind_ops);
2893 return err;
2894}
2895
2896static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2897 struct xe_exec_queue *q,
2898 struct xe_sync_entry *syncs,
2899 int num_syncs)
2900{
2901 struct dma_fence *fence;
2902 int i, err = 0;
2903
2904 fence = xe_sync_in_fence_get(syncs, num_syncs,
2905 to_wait_exec_queue(vm, q), vm);
2906 if (IS_ERR(fence))
2907 return PTR_ERR(fence);
2908
2909 for (i = 0; i < num_syncs; i++)
2910 xe_sync_entry_signal(&syncs[i], fence);
2911
2912 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2913 fence);
2914 dma_fence_put(fence);
2915
2916 return err;
2917}
2918
2919static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
2920 struct xe_exec_queue *q,
2921 struct xe_sync_entry *syncs, u32 num_syncs)
2922{
2923 memset(vops, 0, sizeof(*vops));
2924 INIT_LIST_HEAD(&vops->list);
2925 vops->vm = vm;
2926 vops->q = q;
2927 vops->syncs = syncs;
2928 vops->num_syncs = num_syncs;
2929}
2930
2931static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
2932 u64 addr, u64 range, u64 obj_offset,
2933 u16 pat_index)
2934{
2935 u16 coh_mode;
2936
2937 if (XE_IOCTL_DBG(xe, range > bo->size) ||
2938 XE_IOCTL_DBG(xe, obj_offset >
2939 bo->size - range)) {
2940 return -EINVAL;
2941 }
2942
2943 /*
2944 * Some platforms require 64k VM_BIND alignment,
2945 * specifically those with XE_VRAM_FLAGS_NEED64K.
2946 *
2947 * Other platforms may have BO's set to 64k physical placement,
2948 * but can be mapped at 4k offsets anyway. This check is only
2949 * there for the former case.
2950 */
2951 if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
2952 (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
2953 if (XE_IOCTL_DBG(xe, obj_offset &
2954 XE_64K_PAGE_MASK) ||
2955 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2956 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2957 return -EINVAL;
2958 }
2959 }
2960
2961 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2962 if (bo->cpu_caching) {
2963 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2964 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2965 return -EINVAL;
2966 }
2967 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2968 /*
2969 * Imported dma-buf from a different device should
2970 * require 1way or 2way coherency since we don't know
2971 * how it was mapped on the CPU. Just assume is it
2972 * potentially cached on CPU side.
2973 */
2974 return -EINVAL;
2975 }
2976
2977 return 0;
2978}
2979
2980int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2981{
2982 struct xe_device *xe = to_xe_device(dev);
2983 struct xe_file *xef = to_xe_file(file);
2984 struct drm_xe_vm_bind *args = data;
2985 struct drm_xe_sync __user *syncs_user;
2986 struct xe_bo **bos = NULL;
2987 struct drm_gpuva_ops **ops = NULL;
2988 struct xe_vm *vm;
2989 struct xe_exec_queue *q = NULL;
2990 u32 num_syncs, num_ufence = 0;
2991 struct xe_sync_entry *syncs = NULL;
2992 struct drm_xe_vm_bind_op *bind_ops;
2993 struct xe_vma_ops vops;
2994 int err;
2995 int i;
2996
2997 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2998 if (err)
2999 return err;
3000
3001 if (args->exec_queue_id) {
3002 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3003 if (XE_IOCTL_DBG(xe, !q)) {
3004 err = -ENOENT;
3005 goto free_objs;
3006 }
3007
3008 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3009 err = -EINVAL;
3010 goto put_exec_queue;
3011 }
3012 }
3013
3014 vm = xe_vm_lookup(xef, args->vm_id);
3015 if (XE_IOCTL_DBG(xe, !vm)) {
3016 err = -EINVAL;
3017 goto put_exec_queue;
3018 }
3019
3020 err = down_write_killable(&vm->lock);
3021 if (err)
3022 goto put_vm;
3023
3024 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3025 err = -ENOENT;
3026 goto release_vm_lock;
3027 }
3028
3029 for (i = 0; i < args->num_binds; ++i) {
3030 u64 range = bind_ops[i].range;
3031 u64 addr = bind_ops[i].addr;
3032
3033 if (XE_IOCTL_DBG(xe, range > vm->size) ||
3034 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3035 err = -EINVAL;
3036 goto release_vm_lock;
3037 }
3038 }
3039
3040 if (args->num_binds) {
3041 bos = kvcalloc(args->num_binds, sizeof(*bos),
3042 GFP_KERNEL | __GFP_ACCOUNT);
3043 if (!bos) {
3044 err = -ENOMEM;
3045 goto release_vm_lock;
3046 }
3047
3048 ops = kvcalloc(args->num_binds, sizeof(*ops),
3049 GFP_KERNEL | __GFP_ACCOUNT);
3050 if (!ops) {
3051 err = -ENOMEM;
3052 goto release_vm_lock;
3053 }
3054 }
3055
3056 for (i = 0; i < args->num_binds; ++i) {
3057 struct drm_gem_object *gem_obj;
3058 u64 range = bind_ops[i].range;
3059 u64 addr = bind_ops[i].addr;
3060 u32 obj = bind_ops[i].obj;
3061 u64 obj_offset = bind_ops[i].obj_offset;
3062 u16 pat_index = bind_ops[i].pat_index;
3063
3064 if (!obj)
3065 continue;
3066
3067 gem_obj = drm_gem_object_lookup(file, obj);
3068 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3069 err = -ENOENT;
3070 goto put_obj;
3071 }
3072 bos[i] = gem_to_xe_bo(gem_obj);
3073
3074 err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3075 obj_offset, pat_index);
3076 if (err)
3077 goto put_obj;
3078 }
3079
3080 if (args->num_syncs) {
3081 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3082 if (!syncs) {
3083 err = -ENOMEM;
3084 goto put_obj;
3085 }
3086 }
3087
3088 syncs_user = u64_to_user_ptr(args->syncs);
3089 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3090 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3091 &syncs_user[num_syncs],
3092 (xe_vm_in_lr_mode(vm) ?
3093 SYNC_PARSE_FLAG_LR_MODE : 0) |
3094 (!args->num_binds ?
3095 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3096 if (err)
3097 goto free_syncs;
3098
3099 if (xe_sync_is_ufence(&syncs[num_syncs]))
3100 num_ufence++;
3101 }
3102
3103 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3104 err = -EINVAL;
3105 goto free_syncs;
3106 }
3107
3108 if (!args->num_binds) {
3109 err = -ENODATA;
3110 goto free_syncs;
3111 }
3112
3113 xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3114 for (i = 0; i < args->num_binds; ++i) {
3115 u64 range = bind_ops[i].range;
3116 u64 addr = bind_ops[i].addr;
3117 u32 op = bind_ops[i].op;
3118 u32 flags = bind_ops[i].flags;
3119 u64 obj_offset = bind_ops[i].obj_offset;
3120 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3121 u16 pat_index = bind_ops[i].pat_index;
3122
3123 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3124 addr, range, op, flags,
3125 prefetch_region, pat_index);
3126 if (IS_ERR(ops[i])) {
3127 err = PTR_ERR(ops[i]);
3128 ops[i] = NULL;
3129 goto unwind_ops;
3130 }
3131
3132 err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3133 if (err)
3134 goto unwind_ops;
3135
3136#ifdef TEST_VM_OPS_ERROR
3137 if (flags & FORCE_OP_ERROR) {
3138 vops.inject_error = true;
3139 vm->xe->vm_inject_error_position =
3140 (vm->xe->vm_inject_error_position + 1) %
3141 FORCE_OP_ERROR_COUNT;
3142 }
3143#endif
3144 }
3145
3146 /* Nothing to do */
3147 if (list_empty(&vops.list)) {
3148 err = -ENODATA;
3149 goto unwind_ops;
3150 }
3151
3152 err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3153 if (err)
3154 goto unwind_ops;
3155
3156 err = vm_bind_ioctl_ops_execute(vm, &vops);
3157
3158unwind_ops:
3159 if (err && err != -ENODATA)
3160 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3161 xe_vma_ops_fini(&vops);
3162 for (i = args->num_binds - 1; i >= 0; --i)
3163 if (ops[i])
3164 drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3165free_syncs:
3166 if (err == -ENODATA)
3167 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3168 while (num_syncs--)
3169 xe_sync_entry_cleanup(&syncs[num_syncs]);
3170
3171 kfree(syncs);
3172put_obj:
3173 for (i = 0; i < args->num_binds; ++i)
3174 xe_bo_put(bos[i]);
3175release_vm_lock:
3176 up_write(&vm->lock);
3177put_vm:
3178 xe_vm_put(vm);
3179put_exec_queue:
3180 if (q)
3181 xe_exec_queue_put(q);
3182free_objs:
3183 kvfree(bos);
3184 kvfree(ops);
3185 if (args->num_binds > 1)
3186 kvfree(bind_ops);
3187 return err;
3188}
3189
3190/**
3191 * xe_vm_lock() - Lock the vm's dma_resv object
3192 * @vm: The struct xe_vm whose lock is to be locked
3193 * @intr: Whether to perform any wait interruptible
3194 *
3195 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3196 * contended lock was interrupted. If @intr is false, the function
3197 * always returns 0.
3198 */
3199int xe_vm_lock(struct xe_vm *vm, bool intr)
3200{
3201 if (intr)
3202 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3203
3204 return dma_resv_lock(xe_vm_resv(vm), NULL);
3205}
3206
3207/**
3208 * xe_vm_unlock() - Unlock the vm's dma_resv object
3209 * @vm: The struct xe_vm whose lock is to be released.
3210 *
3211 * Unlock a buffer object lock that was locked by xe_vm_lock().
3212 */
3213void xe_vm_unlock(struct xe_vm *vm)
3214{
3215 dma_resv_unlock(xe_vm_resv(vm));
3216}
3217
3218/**
3219 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3220 * @vma: VMA to invalidate
3221 *
3222 * Walks a list of page tables leaves which it memset the entries owned by this
3223 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3224 * complete.
3225 *
3226 * Returns 0 for success, negative error code otherwise.
3227 */
3228int xe_vm_invalidate_vma(struct xe_vma *vma)
3229{
3230 struct xe_device *xe = xe_vma_vm(vma)->xe;
3231 struct xe_tile *tile;
3232 struct xe_gt_tlb_invalidation_fence
3233 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3234 u8 id;
3235 u32 fence_id = 0;
3236 int ret = 0;
3237
3238 xe_assert(xe, !xe_vma_is_null(vma));
3239 trace_xe_vma_invalidate(vma);
3240
3241 vm_dbg(&xe_vma_vm(vma)->xe->drm,
3242 "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3243 xe_vma_start(vma), xe_vma_size(vma));
3244
3245 /* Check that we don't race with page-table updates */
3246 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3247 if (xe_vma_is_userptr(vma)) {
3248 WARN_ON_ONCE(!mmu_interval_check_retry
3249 (&to_userptr_vma(vma)->userptr.notifier,
3250 to_userptr_vma(vma)->userptr.notifier_seq));
3251 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3252 DMA_RESV_USAGE_BOOKKEEP));
3253
3254 } else {
3255 xe_bo_assert_held(xe_vma_bo(vma));
3256 }
3257 }
3258
3259 for_each_tile(tile, xe, id) {
3260 if (xe_pt_zap_ptes(tile, vma)) {
3261 xe_device_wmb(xe);
3262 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3263 &fence[fence_id],
3264 true);
3265
3266 ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
3267 &fence[fence_id], vma);
3268 if (ret)
3269 goto wait;
3270 ++fence_id;
3271
3272 if (!tile->media_gt)
3273 continue;
3274
3275 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3276 &fence[fence_id],
3277 true);
3278
3279 ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
3280 &fence[fence_id], vma);
3281 if (ret)
3282 goto wait;
3283 ++fence_id;
3284 }
3285 }
3286
3287wait:
3288 for (id = 0; id < fence_id; ++id)
3289 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3290
3291 vma->tile_invalidated = vma->tile_mask;
3292
3293 return ret;
3294}
3295
3296struct xe_vm_snapshot {
3297 unsigned long num_snaps;
3298 struct {
3299 u64 ofs, bo_ofs;
3300 unsigned long len;
3301 struct xe_bo *bo;
3302 void *data;
3303 struct mm_struct *mm;
3304 } snap[];
3305};
3306
3307struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3308{
3309 unsigned long num_snaps = 0, i;
3310 struct xe_vm_snapshot *snap = NULL;
3311 struct drm_gpuva *gpuva;
3312
3313 if (!vm)
3314 return NULL;
3315
3316 mutex_lock(&vm->snap_mutex);
3317 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3318 if (gpuva->flags & XE_VMA_DUMPABLE)
3319 num_snaps++;
3320 }
3321
3322 if (num_snaps)
3323 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3324 if (!snap) {
3325 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3326 goto out_unlock;
3327 }
3328
3329 snap->num_snaps = num_snaps;
3330 i = 0;
3331 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3332 struct xe_vma *vma = gpuva_to_vma(gpuva);
3333 struct xe_bo *bo = vma->gpuva.gem.obj ?
3334 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3335
3336 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3337 continue;
3338
3339 snap->snap[i].ofs = xe_vma_start(vma);
3340 snap->snap[i].len = xe_vma_size(vma);
3341 if (bo) {
3342 snap->snap[i].bo = xe_bo_get(bo);
3343 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3344 } else if (xe_vma_is_userptr(vma)) {
3345 struct mm_struct *mm =
3346 to_userptr_vma(vma)->userptr.notifier.mm;
3347
3348 if (mmget_not_zero(mm))
3349 snap->snap[i].mm = mm;
3350 else
3351 snap->snap[i].data = ERR_PTR(-EFAULT);
3352
3353 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3354 } else {
3355 snap->snap[i].data = ERR_PTR(-ENOENT);
3356 }
3357 i++;
3358 }
3359
3360out_unlock:
3361 mutex_unlock(&vm->snap_mutex);
3362 return snap;
3363}
3364
3365void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3366{
3367 if (IS_ERR_OR_NULL(snap))
3368 return;
3369
3370 for (int i = 0; i < snap->num_snaps; i++) {
3371 struct xe_bo *bo = snap->snap[i].bo;
3372 struct iosys_map src;
3373 int err;
3374
3375 if (IS_ERR(snap->snap[i].data))
3376 continue;
3377
3378 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3379 if (!snap->snap[i].data) {
3380 snap->snap[i].data = ERR_PTR(-ENOMEM);
3381 goto cleanup_bo;
3382 }
3383
3384 if (bo) {
3385 xe_bo_lock(bo, false);
3386 err = ttm_bo_vmap(&bo->ttm, &src);
3387 if (!err) {
3388 xe_map_memcpy_from(xe_bo_device(bo),
3389 snap->snap[i].data,
3390 &src, snap->snap[i].bo_ofs,
3391 snap->snap[i].len);
3392 ttm_bo_vunmap(&bo->ttm, &src);
3393 }
3394 xe_bo_unlock(bo);
3395 } else {
3396 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3397
3398 kthread_use_mm(snap->snap[i].mm);
3399 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3400 err = 0;
3401 else
3402 err = -EFAULT;
3403 kthread_unuse_mm(snap->snap[i].mm);
3404
3405 mmput(snap->snap[i].mm);
3406 snap->snap[i].mm = NULL;
3407 }
3408
3409 if (err) {
3410 kvfree(snap->snap[i].data);
3411 snap->snap[i].data = ERR_PTR(err);
3412 }
3413
3414cleanup_bo:
3415 xe_bo_put(bo);
3416 snap->snap[i].bo = NULL;
3417 }
3418}
3419
3420void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3421{
3422 unsigned long i, j;
3423
3424 if (IS_ERR_OR_NULL(snap)) {
3425 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3426 return;
3427 }
3428
3429 for (i = 0; i < snap->num_snaps; i++) {
3430 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3431
3432 if (IS_ERR(snap->snap[i].data)) {
3433 drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3434 PTR_ERR(snap->snap[i].data));
3435 continue;
3436 }
3437
3438 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3439
3440 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3441 u32 *val = snap->snap[i].data + j;
3442 char dumped[ASCII85_BUFSZ];
3443
3444 drm_puts(p, ascii85_encode(*val, dumped));
3445 }
3446
3447 drm_puts(p, "\n");
3448 }
3449}
3450
3451void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3452{
3453 unsigned long i;
3454
3455 if (IS_ERR_OR_NULL(snap))
3456 return;
3457
3458 for (i = 0; i < snap->num_snaps; i++) {
3459 if (!IS_ERR(snap->snap[i].data))
3460 kvfree(snap->snap[i].data);
3461 xe_bo_put(snap->snap[i].bo);
3462 if (snap->snap[i].mm)
3463 mmput(snap->snap[i].mm);
3464 }
3465 kvfree(snap);
3466}