]> Git Repo - linux.git/blob - drivers/gpu/drm/xe/xe_vm.c
Merge tag 'cxl-for-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
[linux.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_execbuf_util.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_res_cursor.h"
38 #include "xe_sync.h"
39 #include "xe_trace.h"
40 #include "xe_wa.h"
41 #include "xe_hmm.h"
42
43 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
44 {
45         return vm->gpuvm.r_obj;
46 }
47
48 /**
49  * xe_vma_userptr_check_repin() - Advisory check for repin needed
50  * @uvma: The userptr vma
51  *
52  * Check if the userptr vma has been invalidated since last successful
53  * repin. The check is advisory only and can the function can be called
54  * without the vm->userptr.notifier_lock held. There is no guarantee that the
55  * vma userptr will remain valid after a lockless check, so typically
56  * the call needs to be followed by a proper check under the notifier_lock.
57  *
58  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
59  */
60 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
61 {
62         return mmu_interval_check_retry(&uvma->userptr.notifier,
63                                         uvma->userptr.notifier_seq) ?
64                 -EAGAIN : 0;
65 }
66
67 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
68 {
69         struct xe_vma *vma = &uvma->vma;
70         struct xe_vm *vm = xe_vma_vm(vma);
71         struct xe_device *xe = vm->xe;
72
73         lockdep_assert_held(&vm->lock);
74         xe_assert(xe, xe_vma_is_userptr(vma));
75
76         return xe_hmm_userptr_populate_range(uvma, false);
77 }
78
79 static bool preempt_fences_waiting(struct xe_vm *vm)
80 {
81         struct xe_exec_queue *q;
82
83         lockdep_assert_held(&vm->lock);
84         xe_vm_assert_held(vm);
85
86         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
87                 if (!q->compute.pfence ||
88                     (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
89                                                    &q->compute.pfence->flags))) {
90                         return true;
91                 }
92         }
93
94         return false;
95 }
96
97 static void free_preempt_fences(struct list_head *list)
98 {
99         struct list_head *link, *next;
100
101         list_for_each_safe(link, next, list)
102                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
103 }
104
105 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
106                                 unsigned int *count)
107 {
108         lockdep_assert_held(&vm->lock);
109         xe_vm_assert_held(vm);
110
111         if (*count >= vm->preempt.num_exec_queues)
112                 return 0;
113
114         for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
115                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
116
117                 if (IS_ERR(pfence))
118                         return PTR_ERR(pfence);
119
120                 list_move_tail(xe_preempt_fence_link(pfence), list);
121         }
122
123         return 0;
124 }
125
126 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
127 {
128         struct xe_exec_queue *q;
129
130         xe_vm_assert_held(vm);
131
132         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
133                 if (q->compute.pfence) {
134                         long timeout = dma_fence_wait(q->compute.pfence, false);
135
136                         if (timeout < 0)
137                                 return -ETIME;
138                         dma_fence_put(q->compute.pfence);
139                         q->compute.pfence = NULL;
140                 }
141         }
142
143         return 0;
144 }
145
146 static bool xe_vm_is_idle(struct xe_vm *vm)
147 {
148         struct xe_exec_queue *q;
149
150         xe_vm_assert_held(vm);
151         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
152                 if (!xe_exec_queue_is_idle(q))
153                         return false;
154         }
155
156         return true;
157 }
158
159 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
160 {
161         struct list_head *link;
162         struct xe_exec_queue *q;
163
164         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
165                 struct dma_fence *fence;
166
167                 link = list->next;
168                 xe_assert(vm->xe, link != list);
169
170                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
171                                              q, q->compute.context,
172                                              ++q->compute.seqno);
173                 dma_fence_put(q->compute.pfence);
174                 q->compute.pfence = fence;
175         }
176 }
177
178 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
179 {
180         struct xe_exec_queue *q;
181         int err;
182
183         if (!vm->preempt.num_exec_queues)
184                 return 0;
185
186         err = xe_bo_lock(bo, true);
187         if (err)
188                 return err;
189
190         err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
191         if (err)
192                 goto out_unlock;
193
194         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
195                 if (q->compute.pfence) {
196                         dma_resv_add_fence(bo->ttm.base.resv,
197                                            q->compute.pfence,
198                                            DMA_RESV_USAGE_BOOKKEEP);
199                 }
200
201 out_unlock:
202         xe_bo_unlock(bo);
203         return err;
204 }
205
206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
207                                                 struct drm_exec *exec)
208 {
209         struct xe_exec_queue *q;
210
211         lockdep_assert_held(&vm->lock);
212         xe_vm_assert_held(vm);
213
214         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
215                 q->ops->resume(q);
216
217                 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
218                                          DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
219         }
220 }
221
222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
223 {
224         struct drm_gpuvm_exec vm_exec = {
225                 .vm = &vm->gpuvm,
226                 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
227                 .num_fences = 1,
228         };
229         struct drm_exec *exec = &vm_exec.exec;
230         struct dma_fence *pfence;
231         int err;
232         bool wait;
233
234         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
235
236         down_write(&vm->lock);
237         err = drm_gpuvm_exec_lock(&vm_exec);
238         if (err)
239                 goto out_up_write;
240
241         pfence = xe_preempt_fence_create(q, q->compute.context,
242                                          ++q->compute.seqno);
243         if (!pfence) {
244                 err = -ENOMEM;
245                 goto out_fini;
246         }
247
248         list_add(&q->compute.link, &vm->preempt.exec_queues);
249         ++vm->preempt.num_exec_queues;
250         q->compute.pfence = pfence;
251
252         down_read(&vm->userptr.notifier_lock);
253
254         drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
255                                  DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
256
257         /*
258          * Check to see if a preemption on VM is in flight or userptr
259          * invalidation, if so trigger this preempt fence to sync state with
260          * other preempt fences on the VM.
261          */
262         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
263         if (wait)
264                 dma_fence_enable_sw_signaling(pfence);
265
266         up_read(&vm->userptr.notifier_lock);
267
268 out_fini:
269         drm_exec_fini(exec);
270 out_up_write:
271         up_write(&vm->lock);
272
273         return err;
274 }
275
276 /**
277  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
278  * @vm: The VM.
279  * @q: The exec_queue
280  */
281 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
282 {
283         if (!xe_vm_in_preempt_fence_mode(vm))
284                 return;
285
286         down_write(&vm->lock);
287         list_del(&q->compute.link);
288         --vm->preempt.num_exec_queues;
289         if (q->compute.pfence) {
290                 dma_fence_enable_sw_signaling(q->compute.pfence);
291                 dma_fence_put(q->compute.pfence);
292                 q->compute.pfence = NULL;
293         }
294         up_write(&vm->lock);
295 }
296
297 /**
298  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
299  * that need repinning.
300  * @vm: The VM.
301  *
302  * This function checks for whether the VM has userptrs that need repinning,
303  * and provides a release-type barrier on the userptr.notifier_lock after
304  * checking.
305  *
306  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
307  */
308 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
309 {
310         lockdep_assert_held_read(&vm->userptr.notifier_lock);
311
312         return (list_empty(&vm->userptr.repin_list) &&
313                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
314 }
315
316 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
317
318 static void xe_vm_kill(struct xe_vm *vm)
319 {
320         struct xe_exec_queue *q;
321
322         lockdep_assert_held(&vm->lock);
323
324         xe_vm_lock(vm, false);
325         vm->flags |= XE_VM_FLAG_BANNED;
326         trace_xe_vm_kill(vm);
327
328         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
329                 q->ops->kill(q);
330         xe_vm_unlock(vm);
331
332         /* TODO: Inform user the VM is banned */
333 }
334
335 /**
336  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
337  * @exec: The drm_exec object used for locking before validation.
338  * @err: The error returned from ttm_bo_validate().
339  * @end: A ktime_t cookie that should be set to 0 before first use and
340  * that should be reused on subsequent calls.
341  *
342  * With multiple active VMs, under memory pressure, it is possible that
343  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
344  * Until ttm properly handles locking in such scenarios, best thing the
345  * driver can do is retry with a timeout. Check if that is necessary, and
346  * if so unlock the drm_exec's objects while keeping the ticket to prepare
347  * for a rerun.
348  *
349  * Return: true if a retry after drm_exec_init() is recommended;
350  * false otherwise.
351  */
352 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
353 {
354         ktime_t cur;
355
356         if (err != -ENOMEM)
357                 return false;
358
359         cur = ktime_get();
360         *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
361         if (!ktime_before(cur, *end))
362                 return false;
363
364         msleep(20);
365         return true;
366 }
367
368 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
369 {
370         struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
371         struct drm_gpuva *gpuva;
372         int ret;
373
374         lockdep_assert_held(&vm->lock);
375         drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
376                 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
377                                &vm->rebind_list);
378
379         ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
380         if (ret)
381                 return ret;
382
383         vm_bo->evicted = false;
384         return 0;
385 }
386
387 /**
388  * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
389  * @vm: The vm for which we are rebinding.
390  * @exec: The struct drm_exec with the locked GEM objects.
391  * @num_fences: The number of fences to reserve for the operation, not
392  * including rebinds and validations.
393  *
394  * Validates all evicted gem objects and rebinds their vmas. Note that
395  * rebindings may cause evictions and hence the validation-rebind
396  * sequence is rerun until there are no more objects to validate.
397  *
398  * Return: 0 on success, negative error code on error. In particular,
399  * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
400  * the drm_exec transaction needs to be restarted.
401  */
402 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
403                           unsigned int num_fences)
404 {
405         struct drm_gem_object *obj;
406         unsigned long index;
407         int ret;
408
409         do {
410                 ret = drm_gpuvm_validate(&vm->gpuvm, exec);
411                 if (ret)
412                         return ret;
413
414                 ret = xe_vm_rebind(vm, false);
415                 if (ret)
416                         return ret;
417         } while (!list_empty(&vm->gpuvm.evict.list));
418
419         drm_exec_for_each_locked_object(exec, index, obj) {
420                 ret = dma_resv_reserve_fences(obj->resv, num_fences);
421                 if (ret)
422                         return ret;
423         }
424
425         return 0;
426 }
427
428 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
429                                  bool *done)
430 {
431         int err;
432
433         err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
434         if (err)
435                 return err;
436
437         if (xe_vm_is_idle(vm)) {
438                 vm->preempt.rebind_deactivated = true;
439                 *done = true;
440                 return 0;
441         }
442
443         if (!preempt_fences_waiting(vm)) {
444                 *done = true;
445                 return 0;
446         }
447
448         err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
449         if (err)
450                 return err;
451
452         err = wait_for_existing_preempt_fences(vm);
453         if (err)
454                 return err;
455
456         /*
457          * Add validation and rebinding to the locking loop since both can
458          * cause evictions which may require blocing dma_resv locks.
459          * The fence reservation here is intended for the new preempt fences
460          * we attach at the end of the rebind work.
461          */
462         return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
463 }
464
465 static void preempt_rebind_work_func(struct work_struct *w)
466 {
467         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
468         struct drm_exec exec;
469         unsigned int fence_count = 0;
470         LIST_HEAD(preempt_fences);
471         ktime_t end = 0;
472         int err = 0;
473         long wait;
474         int __maybe_unused tries = 0;
475
476         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
477         trace_xe_vm_rebind_worker_enter(vm);
478
479         down_write(&vm->lock);
480
481         if (xe_vm_is_closed_or_banned(vm)) {
482                 up_write(&vm->lock);
483                 trace_xe_vm_rebind_worker_exit(vm);
484                 return;
485         }
486
487 retry:
488         if (xe_vm_userptr_check_repin(vm)) {
489                 err = xe_vm_userptr_pin(vm);
490                 if (err)
491                         goto out_unlock_outer;
492         }
493
494         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
495
496         drm_exec_until_all_locked(&exec) {
497                 bool done = false;
498
499                 err = xe_preempt_work_begin(&exec, vm, &done);
500                 drm_exec_retry_on_contention(&exec);
501                 if (err || done) {
502                         drm_exec_fini(&exec);
503                         if (err && xe_vm_validate_should_retry(&exec, err, &end))
504                                 err = -EAGAIN;
505
506                         goto out_unlock_outer;
507                 }
508         }
509
510         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
511         if (err)
512                 goto out_unlock;
513
514         err = xe_vm_rebind(vm, true);
515         if (err)
516                 goto out_unlock;
517
518         /* Wait on rebinds and munmap style VM unbinds */
519         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
520                                      DMA_RESV_USAGE_KERNEL,
521                                      false, MAX_SCHEDULE_TIMEOUT);
522         if (wait <= 0) {
523                 err = -ETIME;
524                 goto out_unlock;
525         }
526
527 #define retry_required(__tries, __vm) \
528         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
529         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
530         __xe_vm_userptr_needs_repin(__vm))
531
532         down_read(&vm->userptr.notifier_lock);
533         if (retry_required(tries, vm)) {
534                 up_read(&vm->userptr.notifier_lock);
535                 err = -EAGAIN;
536                 goto out_unlock;
537         }
538
539 #undef retry_required
540
541         spin_lock(&vm->xe->ttm.lru_lock);
542         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
543         spin_unlock(&vm->xe->ttm.lru_lock);
544
545         /* Point of no return. */
546         arm_preempt_fences(vm, &preempt_fences);
547         resume_and_reinstall_preempt_fences(vm, &exec);
548         up_read(&vm->userptr.notifier_lock);
549
550 out_unlock:
551         drm_exec_fini(&exec);
552 out_unlock_outer:
553         if (err == -EAGAIN) {
554                 trace_xe_vm_rebind_worker_retry(vm);
555                 goto retry;
556         }
557
558         if (err) {
559                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
560                 xe_vm_kill(vm);
561         }
562         up_write(&vm->lock);
563
564         free_preempt_fences(&preempt_fences);
565
566         trace_xe_vm_rebind_worker_exit(vm);
567 }
568
569 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
570                                    const struct mmu_notifier_range *range,
571                                    unsigned long cur_seq)
572 {
573         struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
574         struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
575         struct xe_vma *vma = &uvma->vma;
576         struct xe_vm *vm = xe_vma_vm(vma);
577         struct dma_resv_iter cursor;
578         struct dma_fence *fence;
579         long err;
580
581         xe_assert(vm->xe, xe_vma_is_userptr(vma));
582         trace_xe_vma_userptr_invalidate(vma);
583
584         if (!mmu_notifier_range_blockable(range))
585                 return false;
586
587         vm_dbg(&xe_vma_vm(vma)->xe->drm,
588                "NOTIFIER: addr=0x%016llx, range=0x%016llx",
589                 xe_vma_start(vma), xe_vma_size(vma));
590
591         down_write(&vm->userptr.notifier_lock);
592         mmu_interval_set_seq(mni, cur_seq);
593
594         /* No need to stop gpu access if the userptr is not yet bound. */
595         if (!userptr->initial_bind) {
596                 up_write(&vm->userptr.notifier_lock);
597                 return true;
598         }
599
600         /*
601          * Tell exec and rebind worker they need to repin and rebind this
602          * userptr.
603          */
604         if (!xe_vm_in_fault_mode(vm) &&
605             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
606                 spin_lock(&vm->userptr.invalidated_lock);
607                 list_move_tail(&userptr->invalidate_link,
608                                &vm->userptr.invalidated);
609                 spin_unlock(&vm->userptr.invalidated_lock);
610         }
611
612         up_write(&vm->userptr.notifier_lock);
613
614         /*
615          * Preempt fences turn into schedule disables, pipeline these.
616          * Note that even in fault mode, we need to wait for binds and
617          * unbinds to complete, and those are attached as BOOKMARK fences
618          * to the vm.
619          */
620         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
621                             DMA_RESV_USAGE_BOOKKEEP);
622         dma_resv_for_each_fence_unlocked(&cursor, fence)
623                 dma_fence_enable_sw_signaling(fence);
624         dma_resv_iter_end(&cursor);
625
626         err = dma_resv_wait_timeout(xe_vm_resv(vm),
627                                     DMA_RESV_USAGE_BOOKKEEP,
628                                     false, MAX_SCHEDULE_TIMEOUT);
629         XE_WARN_ON(err <= 0);
630
631         if (xe_vm_in_fault_mode(vm)) {
632                 err = xe_vm_invalidate_vma(vma);
633                 XE_WARN_ON(err);
634         }
635
636         trace_xe_vma_userptr_invalidate_complete(vma);
637
638         return true;
639 }
640
641 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
642         .invalidate = vma_userptr_invalidate,
643 };
644
645 int xe_vm_userptr_pin(struct xe_vm *vm)
646 {
647         struct xe_userptr_vma *uvma, *next;
648         int err = 0;
649         LIST_HEAD(tmp_evict);
650
651         xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
652         lockdep_assert_held_write(&vm->lock);
653
654         /* Collect invalidated userptrs */
655         spin_lock(&vm->userptr.invalidated_lock);
656         list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
657                                  userptr.invalidate_link) {
658                 list_del_init(&uvma->userptr.invalidate_link);
659                 list_move_tail(&uvma->userptr.repin_link,
660                                &vm->userptr.repin_list);
661         }
662         spin_unlock(&vm->userptr.invalidated_lock);
663
664         /* Pin and move to temporary list */
665         list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
666                                  userptr.repin_link) {
667                 err = xe_vma_userptr_pin_pages(uvma);
668                 if (err == -EFAULT) {
669                         list_del_init(&uvma->userptr.repin_link);
670
671                         /* Wait for pending binds */
672                         xe_vm_lock(vm, false);
673                         dma_resv_wait_timeout(xe_vm_resv(vm),
674                                               DMA_RESV_USAGE_BOOKKEEP,
675                                               false, MAX_SCHEDULE_TIMEOUT);
676
677                         err = xe_vm_invalidate_vma(&uvma->vma);
678                         xe_vm_unlock(vm);
679                         if (err)
680                                 return err;
681                 } else {
682                         if (err < 0)
683                                 return err;
684
685                         list_del_init(&uvma->userptr.repin_link);
686                         list_move_tail(&uvma->vma.combined_links.rebind,
687                                        &vm->rebind_list);
688                 }
689         }
690
691         return 0;
692 }
693
694 /**
695  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
696  * that need repinning.
697  * @vm: The VM.
698  *
699  * This function does an advisory check for whether the VM has userptrs that
700  * need repinning.
701  *
702  * Return: 0 if there are no indications of userptrs needing repinning,
703  * -EAGAIN if there are.
704  */
705 int xe_vm_userptr_check_repin(struct xe_vm *vm)
706 {
707         return (list_empty_careful(&vm->userptr.repin_list) &&
708                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
709 }
710
711 static struct dma_fence *
712 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
713                struct xe_sync_entry *syncs, u32 num_syncs,
714                bool first_op, bool last_op);
715
716 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
717 {
718         struct dma_fence *fence;
719         struct xe_vma *vma, *next;
720
721         lockdep_assert_held(&vm->lock);
722         if (xe_vm_in_lr_mode(vm) && !rebind_worker)
723                 return 0;
724
725         xe_vm_assert_held(vm);
726         list_for_each_entry_safe(vma, next, &vm->rebind_list,
727                                  combined_links.rebind) {
728                 xe_assert(vm->xe, vma->tile_present);
729
730                 list_del_init(&vma->combined_links.rebind);
731                 if (rebind_worker)
732                         trace_xe_vma_rebind_worker(vma);
733                 else
734                         trace_xe_vma_rebind_exec(vma);
735                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
736                 if (IS_ERR(fence))
737                         return PTR_ERR(fence);
738                 dma_fence_put(fence);
739         }
740
741         return 0;
742 }
743
744 static void xe_vma_free(struct xe_vma *vma)
745 {
746         if (xe_vma_is_userptr(vma))
747                 kfree(to_userptr_vma(vma));
748         else
749                 kfree(vma);
750 }
751
752 #define VMA_CREATE_FLAG_READ_ONLY       BIT(0)
753 #define VMA_CREATE_FLAG_IS_NULL         BIT(1)
754 #define VMA_CREATE_FLAG_DUMPABLE        BIT(2)
755
756 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
757                                     struct xe_bo *bo,
758                                     u64 bo_offset_or_userptr,
759                                     u64 start, u64 end,
760                                     u16 pat_index, unsigned int flags)
761 {
762         struct xe_vma *vma;
763         struct xe_tile *tile;
764         u8 id;
765         bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
766         bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
767         bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
768
769         xe_assert(vm->xe, start < end);
770         xe_assert(vm->xe, end < vm->size);
771
772         /*
773          * Allocate and ensure that the xe_vma_is_userptr() return
774          * matches what was allocated.
775          */
776         if (!bo && !is_null) {
777                 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
778
779                 if (!uvma)
780                         return ERR_PTR(-ENOMEM);
781
782                 vma = &uvma->vma;
783         } else {
784                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
785                 if (!vma)
786                         return ERR_PTR(-ENOMEM);
787
788                 if (is_null)
789                         vma->gpuva.flags |= DRM_GPUVA_SPARSE;
790                 if (bo)
791                         vma->gpuva.gem.obj = &bo->ttm.base;
792         }
793
794         INIT_LIST_HEAD(&vma->combined_links.rebind);
795
796         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
797         vma->gpuva.vm = &vm->gpuvm;
798         vma->gpuva.va.addr = start;
799         vma->gpuva.va.range = end - start + 1;
800         if (read_only)
801                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
802         if (dumpable)
803                 vma->gpuva.flags |= XE_VMA_DUMPABLE;
804
805         for_each_tile(tile, vm->xe, id)
806                 vma->tile_mask |= 0x1 << id;
807
808         if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
809                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
810
811         vma->pat_index = pat_index;
812
813         if (bo) {
814                 struct drm_gpuvm_bo *vm_bo;
815
816                 xe_bo_assert_held(bo);
817
818                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
819                 if (IS_ERR(vm_bo)) {
820                         xe_vma_free(vma);
821                         return ERR_CAST(vm_bo);
822                 }
823
824                 drm_gpuvm_bo_extobj_add(vm_bo);
825                 drm_gem_object_get(&bo->ttm.base);
826                 vma->gpuva.gem.offset = bo_offset_or_userptr;
827                 drm_gpuva_link(&vma->gpuva, vm_bo);
828                 drm_gpuvm_bo_put(vm_bo);
829         } else /* userptr or null */ {
830                 if (!is_null) {
831                         struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
832                         u64 size = end - start + 1;
833                         int err;
834
835                         INIT_LIST_HEAD(&userptr->invalidate_link);
836                         INIT_LIST_HEAD(&userptr->repin_link);
837                         vma->gpuva.gem.offset = bo_offset_or_userptr;
838
839                         err = mmu_interval_notifier_insert(&userptr->notifier,
840                                                            current->mm,
841                                                            xe_vma_userptr(vma), size,
842                                                            &vma_userptr_notifier_ops);
843                         if (err) {
844                                 xe_vma_free(vma);
845                                 return ERR_PTR(err);
846                         }
847
848                         userptr->notifier_seq = LONG_MAX;
849                 }
850
851                 xe_vm_get(vm);
852         }
853
854         return vma;
855 }
856
857 static void xe_vma_destroy_late(struct xe_vma *vma)
858 {
859         struct xe_vm *vm = xe_vma_vm(vma);
860
861         if (vma->ufence) {
862                 xe_sync_ufence_put(vma->ufence);
863                 vma->ufence = NULL;
864         }
865
866         if (xe_vma_is_userptr(vma)) {
867                 struct xe_userptr_vma *uvma = to_userptr_vma(vma);
868                 struct xe_userptr *userptr = &uvma->userptr;
869
870                 if (userptr->sg)
871                         xe_hmm_userptr_free_sg(uvma);
872
873                 /*
874                  * Since userptr pages are not pinned, we can't remove
875                  * the notifer until we're sure the GPU is not accessing
876                  * them anymore
877                  */
878                 mmu_interval_notifier_remove(&userptr->notifier);
879                 xe_vm_put(vm);
880         } else if (xe_vma_is_null(vma)) {
881                 xe_vm_put(vm);
882         } else {
883                 xe_bo_put(xe_vma_bo(vma));
884         }
885
886         xe_vma_free(vma);
887 }
888
889 static void vma_destroy_work_func(struct work_struct *w)
890 {
891         struct xe_vma *vma =
892                 container_of(w, struct xe_vma, destroy_work);
893
894         xe_vma_destroy_late(vma);
895 }
896
897 static void vma_destroy_cb(struct dma_fence *fence,
898                            struct dma_fence_cb *cb)
899 {
900         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
901
902         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
903         queue_work(system_unbound_wq, &vma->destroy_work);
904 }
905
906 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
907 {
908         struct xe_vm *vm = xe_vma_vm(vma);
909
910         lockdep_assert_held_write(&vm->lock);
911         xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
912
913         if (xe_vma_is_userptr(vma)) {
914                 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
915
916                 spin_lock(&vm->userptr.invalidated_lock);
917                 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
918                 spin_unlock(&vm->userptr.invalidated_lock);
919         } else if (!xe_vma_is_null(vma)) {
920                 xe_bo_assert_held(xe_vma_bo(vma));
921
922                 drm_gpuva_unlink(&vma->gpuva);
923         }
924
925         xe_vm_assert_held(vm);
926         if (fence) {
927                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
928                                                  vma_destroy_cb);
929
930                 if (ret) {
931                         XE_WARN_ON(ret != -ENOENT);
932                         xe_vma_destroy_late(vma);
933                 }
934         } else {
935                 xe_vma_destroy_late(vma);
936         }
937 }
938
939 /**
940  * xe_vm_lock_vma() - drm_exec utility to lock a vma
941  * @exec: The drm_exec object we're currently locking for.
942  * @vma: The vma for witch we want to lock the vm resv and any attached
943  * object's resv.
944  *
945  * Return: 0 on success, negative error code on error. In particular
946  * may return -EDEADLK on WW transaction contention and -EINTR if
947  * an interruptible wait is terminated by a signal.
948  */
949 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
950 {
951         struct xe_vm *vm = xe_vma_vm(vma);
952         struct xe_bo *bo = xe_vma_bo(vma);
953         int err;
954
955         XE_WARN_ON(!vm);
956
957         err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
958         if (!err && bo && !bo->vm)
959                 err = drm_exec_lock_obj(exec, &bo->ttm.base);
960
961         return err;
962 }
963
964 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
965 {
966         struct drm_exec exec;
967         int err;
968
969         drm_exec_init(&exec, 0, 0);
970         drm_exec_until_all_locked(&exec) {
971                 err = xe_vm_lock_vma(&exec, vma);
972                 drm_exec_retry_on_contention(&exec);
973                 if (XE_WARN_ON(err))
974                         break;
975         }
976
977         xe_vma_destroy(vma, NULL);
978
979         drm_exec_fini(&exec);
980 }
981
982 struct xe_vma *
983 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
984 {
985         struct drm_gpuva *gpuva;
986
987         lockdep_assert_held(&vm->lock);
988
989         if (xe_vm_is_closed_or_banned(vm))
990                 return NULL;
991
992         xe_assert(vm->xe, start + range <= vm->size);
993
994         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
995
996         return gpuva ? gpuva_to_vma(gpuva) : NULL;
997 }
998
999 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1000 {
1001         int err;
1002
1003         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1004         lockdep_assert_held(&vm->lock);
1005
1006         mutex_lock(&vm->snap_mutex);
1007         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1008         mutex_unlock(&vm->snap_mutex);
1009         XE_WARN_ON(err);        /* Shouldn't be possible */
1010
1011         return err;
1012 }
1013
1014 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1015 {
1016         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1017         lockdep_assert_held(&vm->lock);
1018
1019         mutex_lock(&vm->snap_mutex);
1020         drm_gpuva_remove(&vma->gpuva);
1021         mutex_unlock(&vm->snap_mutex);
1022         if (vm->usm.last_fault_vma == vma)
1023                 vm->usm.last_fault_vma = NULL;
1024 }
1025
1026 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1027 {
1028         struct xe_vma_op *op;
1029
1030         op = kzalloc(sizeof(*op), GFP_KERNEL);
1031
1032         if (unlikely(!op))
1033                 return NULL;
1034
1035         return &op->base;
1036 }
1037
1038 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1039
1040 static const struct drm_gpuvm_ops gpuvm_ops = {
1041         .op_alloc = xe_vm_op_alloc,
1042         .vm_bo_validate = xe_gpuvm_validate,
1043         .vm_free = xe_vm_free,
1044 };
1045
1046 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1047 {
1048         u64 pte = 0;
1049
1050         if (pat_index & BIT(0))
1051                 pte |= XE_PPGTT_PTE_PAT0;
1052
1053         if (pat_index & BIT(1))
1054                 pte |= XE_PPGTT_PTE_PAT1;
1055
1056         return pte;
1057 }
1058
1059 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1060                                 u32 pt_level)
1061 {
1062         u64 pte = 0;
1063
1064         if (pat_index & BIT(0))
1065                 pte |= XE_PPGTT_PTE_PAT0;
1066
1067         if (pat_index & BIT(1))
1068                 pte |= XE_PPGTT_PTE_PAT1;
1069
1070         if (pat_index & BIT(2)) {
1071                 if (pt_level)
1072                         pte |= XE_PPGTT_PDE_PDPE_PAT2;
1073                 else
1074                         pte |= XE_PPGTT_PTE_PAT2;
1075         }
1076
1077         if (pat_index & BIT(3))
1078                 pte |= XELPG_PPGTT_PTE_PAT3;
1079
1080         if (pat_index & (BIT(4)))
1081                 pte |= XE2_PPGTT_PTE_PAT4;
1082
1083         return pte;
1084 }
1085
1086 static u64 pte_encode_ps(u32 pt_level)
1087 {
1088         XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1089
1090         if (pt_level == 1)
1091                 return XE_PDE_PS_2M;
1092         else if (pt_level == 2)
1093                 return XE_PDPE_PS_1G;
1094
1095         return 0;
1096 }
1097
1098 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1099                               const u16 pat_index)
1100 {
1101         struct xe_device *xe = xe_bo_device(bo);
1102         u64 pde;
1103
1104         pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1105         pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1106         pde |= pde_encode_pat_index(xe, pat_index);
1107
1108         return pde;
1109 }
1110
1111 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1112                               u16 pat_index, u32 pt_level)
1113 {
1114         struct xe_device *xe = xe_bo_device(bo);
1115         u64 pte;
1116
1117         pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1118         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1119         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1120         pte |= pte_encode_ps(pt_level);
1121
1122         if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1123                 pte |= XE_PPGTT_PTE_DM;
1124
1125         return pte;
1126 }
1127
1128 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1129                                u16 pat_index, u32 pt_level)
1130 {
1131         struct xe_device *xe = xe_vma_vm(vma)->xe;
1132
1133         pte |= XE_PAGE_PRESENT;
1134
1135         if (likely(!xe_vma_read_only(vma)))
1136                 pte |= XE_PAGE_RW;
1137
1138         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1139         pte |= pte_encode_ps(pt_level);
1140
1141         if (unlikely(xe_vma_is_null(vma)))
1142                 pte |= XE_PTE_NULL;
1143
1144         return pte;
1145 }
1146
1147 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1148                                 u16 pat_index,
1149                                 u32 pt_level, bool devmem, u64 flags)
1150 {
1151         u64 pte;
1152
1153         /* Avoid passing random bits directly as flags */
1154         xe_assert(xe, !(flags & ~XE_PTE_PS64));
1155
1156         pte = addr;
1157         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1158         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1159         pte |= pte_encode_ps(pt_level);
1160
1161         if (devmem)
1162                 pte |= XE_PPGTT_PTE_DM;
1163
1164         pte |= flags;
1165
1166         return pte;
1167 }
1168
1169 static const struct xe_pt_ops xelp_pt_ops = {
1170         .pte_encode_bo = xelp_pte_encode_bo,
1171         .pte_encode_vma = xelp_pte_encode_vma,
1172         .pte_encode_addr = xelp_pte_encode_addr,
1173         .pde_encode_bo = xelp_pde_encode_bo,
1174 };
1175
1176 /**
1177  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1178  * given tile and vm.
1179  * @xe: xe device.
1180  * @tile: tile to set up for.
1181  * @vm: vm to set up for.
1182  *
1183  * Sets up a pagetable tree with one page-table per level and a single
1184  * leaf PTE. All pagetable entries point to the single page-table or,
1185  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1186  * writes become NOPs.
1187  *
1188  * Return: 0 on success, negative error code on error.
1189  */
1190 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1191                                 struct xe_vm *vm)
1192 {
1193         u8 id = tile->id;
1194         int i;
1195
1196         for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1197                 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1198                 if (IS_ERR(vm->scratch_pt[id][i]))
1199                         return PTR_ERR(vm->scratch_pt[id][i]);
1200
1201                 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1202         }
1203
1204         return 0;
1205 }
1206
1207 static void xe_vm_free_scratch(struct xe_vm *vm)
1208 {
1209         struct xe_tile *tile;
1210         u8 id;
1211
1212         if (!xe_vm_has_scratch(vm))
1213                 return;
1214
1215         for_each_tile(tile, vm->xe, id) {
1216                 u32 i;
1217
1218                 if (!vm->pt_root[id])
1219                         continue;
1220
1221                 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1222                         if (vm->scratch_pt[id][i])
1223                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1224         }
1225 }
1226
1227 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1228 {
1229         struct drm_gem_object *vm_resv_obj;
1230         struct xe_vm *vm;
1231         int err, number_tiles = 0;
1232         struct xe_tile *tile;
1233         u8 id;
1234
1235         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1236         if (!vm)
1237                 return ERR_PTR(-ENOMEM);
1238
1239         vm->xe = xe;
1240
1241         vm->size = 1ull << xe->info.va_bits;
1242
1243         vm->flags = flags;
1244
1245         init_rwsem(&vm->lock);
1246         mutex_init(&vm->snap_mutex);
1247
1248         INIT_LIST_HEAD(&vm->rebind_list);
1249
1250         INIT_LIST_HEAD(&vm->userptr.repin_list);
1251         INIT_LIST_HEAD(&vm->userptr.invalidated);
1252         init_rwsem(&vm->userptr.notifier_lock);
1253         spin_lock_init(&vm->userptr.invalidated_lock);
1254
1255         INIT_LIST_HEAD(&vm->preempt.exec_queues);
1256         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1257
1258         for_each_tile(tile, xe, id)
1259                 xe_range_fence_tree_init(&vm->rftree[id]);
1260
1261         vm->pt_ops = &xelp_pt_ops;
1262
1263         if (!(flags & XE_VM_FLAG_MIGRATION))
1264                 xe_pm_runtime_get_noresume(xe);
1265
1266         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1267         if (!vm_resv_obj) {
1268                 err = -ENOMEM;
1269                 goto err_no_resv;
1270         }
1271
1272         drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1273                        vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1274
1275         drm_gem_object_put(vm_resv_obj);
1276
1277         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1278         if (err)
1279                 goto err_close;
1280
1281         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1282                 vm->flags |= XE_VM_FLAG_64K;
1283
1284         for_each_tile(tile, xe, id) {
1285                 if (flags & XE_VM_FLAG_MIGRATION &&
1286                     tile->id != XE_VM_FLAG_TILE_ID(flags))
1287                         continue;
1288
1289                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1290                 if (IS_ERR(vm->pt_root[id])) {
1291                         err = PTR_ERR(vm->pt_root[id]);
1292                         vm->pt_root[id] = NULL;
1293                         goto err_unlock_close;
1294                 }
1295         }
1296
1297         if (xe_vm_has_scratch(vm)) {
1298                 for_each_tile(tile, xe, id) {
1299                         if (!vm->pt_root[id])
1300                                 continue;
1301
1302                         err = xe_vm_create_scratch(xe, tile, vm);
1303                         if (err)
1304                                 goto err_unlock_close;
1305                 }
1306                 vm->batch_invalidate_tlb = true;
1307         }
1308
1309         if (vm->flags & XE_VM_FLAG_LR_MODE) {
1310                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1311                 vm->batch_invalidate_tlb = false;
1312         }
1313
1314         /* Fill pt_root after allocating scratch tables */
1315         for_each_tile(tile, xe, id) {
1316                 if (!vm->pt_root[id])
1317                         continue;
1318
1319                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1320         }
1321         dma_resv_unlock(xe_vm_resv(vm));
1322
1323         /* Kernel migration VM shouldn't have a circular loop.. */
1324         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1325                 for_each_tile(tile, xe, id) {
1326                         struct xe_gt *gt = tile->primary_gt;
1327                         struct xe_vm *migrate_vm;
1328                         struct xe_exec_queue *q;
1329                         u32 create_flags = EXEC_QUEUE_FLAG_VM;
1330
1331                         if (!vm->pt_root[id])
1332                                 continue;
1333
1334                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1335                         q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1336                                                        XE_ENGINE_CLASS_COPY,
1337                                                        create_flags);
1338                         xe_vm_put(migrate_vm);
1339                         if (IS_ERR(q)) {
1340                                 err = PTR_ERR(q);
1341                                 goto err_close;
1342                         }
1343                         vm->q[id] = q;
1344                         number_tiles++;
1345                 }
1346         }
1347
1348         if (number_tiles > 1)
1349                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1350
1351         mutex_lock(&xe->usm.lock);
1352         if (flags & XE_VM_FLAG_FAULT_MODE)
1353                 xe->usm.num_vm_in_fault_mode++;
1354         else if (!(flags & XE_VM_FLAG_MIGRATION))
1355                 xe->usm.num_vm_in_non_fault_mode++;
1356         mutex_unlock(&xe->usm.lock);
1357
1358         trace_xe_vm_create(vm);
1359
1360         return vm;
1361
1362 err_unlock_close:
1363         dma_resv_unlock(xe_vm_resv(vm));
1364 err_close:
1365         xe_vm_close_and_put(vm);
1366         return ERR_PTR(err);
1367
1368 err_no_resv:
1369         mutex_destroy(&vm->snap_mutex);
1370         for_each_tile(tile, xe, id)
1371                 xe_range_fence_tree_fini(&vm->rftree[id]);
1372         kfree(vm);
1373         if (!(flags & XE_VM_FLAG_MIGRATION))
1374                 xe_pm_runtime_put(xe);
1375         return ERR_PTR(err);
1376 }
1377
1378 static void xe_vm_close(struct xe_vm *vm)
1379 {
1380         down_write(&vm->lock);
1381         vm->size = 0;
1382         up_write(&vm->lock);
1383 }
1384
1385 void xe_vm_close_and_put(struct xe_vm *vm)
1386 {
1387         LIST_HEAD(contested);
1388         struct xe_device *xe = vm->xe;
1389         struct xe_tile *tile;
1390         struct xe_vma *vma, *next_vma;
1391         struct drm_gpuva *gpuva, *next;
1392         u8 id;
1393
1394         xe_assert(xe, !vm->preempt.num_exec_queues);
1395
1396         xe_vm_close(vm);
1397         if (xe_vm_in_preempt_fence_mode(vm))
1398                 flush_work(&vm->preempt.rebind_work);
1399
1400         down_write(&vm->lock);
1401         for_each_tile(tile, xe, id) {
1402                 if (vm->q[id])
1403                         xe_exec_queue_last_fence_put(vm->q[id], vm);
1404         }
1405         up_write(&vm->lock);
1406
1407         for_each_tile(tile, xe, id) {
1408                 if (vm->q[id]) {
1409                         xe_exec_queue_kill(vm->q[id]);
1410                         xe_exec_queue_put(vm->q[id]);
1411                         vm->q[id] = NULL;
1412                 }
1413         }
1414
1415         down_write(&vm->lock);
1416         xe_vm_lock(vm, false);
1417         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1418                 vma = gpuva_to_vma(gpuva);
1419
1420                 if (xe_vma_has_no_bo(vma)) {
1421                         down_read(&vm->userptr.notifier_lock);
1422                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1423                         up_read(&vm->userptr.notifier_lock);
1424                 }
1425
1426                 xe_vm_remove_vma(vm, vma);
1427
1428                 /* easy case, remove from VMA? */
1429                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1430                         list_del_init(&vma->combined_links.rebind);
1431                         xe_vma_destroy(vma, NULL);
1432                         continue;
1433                 }
1434
1435                 list_move_tail(&vma->combined_links.destroy, &contested);
1436                 vma->gpuva.flags |= XE_VMA_DESTROYED;
1437         }
1438
1439         /*
1440          * All vm operations will add shared fences to resv.
1441          * The only exception is eviction for a shared object,
1442          * but even so, the unbind when evicted would still
1443          * install a fence to resv. Hence it's safe to
1444          * destroy the pagetables immediately.
1445          */
1446         xe_vm_free_scratch(vm);
1447
1448         for_each_tile(tile, xe, id) {
1449                 if (vm->pt_root[id]) {
1450                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1451                         vm->pt_root[id] = NULL;
1452                 }
1453         }
1454         xe_vm_unlock(vm);
1455
1456         /*
1457          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1458          * Since we hold a refcount to the bo, we can remove and free
1459          * the members safely without locking.
1460          */
1461         list_for_each_entry_safe(vma, next_vma, &contested,
1462                                  combined_links.destroy) {
1463                 list_del_init(&vma->combined_links.destroy);
1464                 xe_vma_destroy_unlocked(vma);
1465         }
1466
1467         up_write(&vm->lock);
1468
1469         mutex_lock(&xe->usm.lock);
1470         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1471                 xe->usm.num_vm_in_fault_mode--;
1472         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1473                 xe->usm.num_vm_in_non_fault_mode--;
1474
1475         if (vm->usm.asid) {
1476                 void *lookup;
1477
1478                 xe_assert(xe, xe->info.has_asid);
1479                 xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1480
1481                 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1482                 xe_assert(xe, lookup == vm);
1483         }
1484         mutex_unlock(&xe->usm.lock);
1485
1486         for_each_tile(tile, xe, id)
1487                 xe_range_fence_tree_fini(&vm->rftree[id]);
1488
1489         xe_vm_put(vm);
1490 }
1491
1492 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1493 {
1494         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1495         struct xe_device *xe = vm->xe;
1496         struct xe_tile *tile;
1497         u8 id;
1498
1499         /* xe_vm_close_and_put was not called? */
1500         xe_assert(xe, !vm->size);
1501
1502         if (xe_vm_in_preempt_fence_mode(vm))
1503                 flush_work(&vm->preempt.rebind_work);
1504
1505         mutex_destroy(&vm->snap_mutex);
1506
1507         if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1508                 xe_pm_runtime_put(xe);
1509
1510         for_each_tile(tile, xe, id)
1511                 XE_WARN_ON(vm->pt_root[id]);
1512
1513         trace_xe_vm_free(vm);
1514         kfree(vm);
1515 }
1516
1517 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1518 {
1519         struct xe_vm *vm;
1520
1521         mutex_lock(&xef->vm.lock);
1522         vm = xa_load(&xef->vm.xa, id);
1523         if (vm)
1524                 xe_vm_get(vm);
1525         mutex_unlock(&xef->vm.lock);
1526
1527         return vm;
1528 }
1529
1530 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1531 {
1532         return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1533                                          tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1534 }
1535
1536 static struct xe_exec_queue *
1537 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1538 {
1539         return q ? q : vm->q[0];
1540 }
1541
1542 static struct dma_fence *
1543 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1544                  struct xe_sync_entry *syncs, u32 num_syncs,
1545                  bool first_op, bool last_op)
1546 {
1547         struct xe_vm *vm = xe_vma_vm(vma);
1548         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1549         struct xe_tile *tile;
1550         struct dma_fence *fence = NULL;
1551         struct dma_fence **fences = NULL;
1552         struct dma_fence_array *cf = NULL;
1553         int cur_fence = 0, i;
1554         int number_tiles = hweight8(vma->tile_present);
1555         int err;
1556         u8 id;
1557
1558         trace_xe_vma_unbind(vma);
1559
1560         if (vma->ufence) {
1561                 struct xe_user_fence * const f = vma->ufence;
1562
1563                 if (!xe_sync_ufence_get_status(f))
1564                         return ERR_PTR(-EBUSY);
1565
1566                 vma->ufence = NULL;
1567                 xe_sync_ufence_put(f);
1568         }
1569
1570         if (number_tiles > 1) {
1571                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1572                                        GFP_KERNEL);
1573                 if (!fences)
1574                         return ERR_PTR(-ENOMEM);
1575         }
1576
1577         for_each_tile(tile, vm->xe, id) {
1578                 if (!(vma->tile_present & BIT(id)))
1579                         goto next;
1580
1581                 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1582                                            first_op ? syncs : NULL,
1583                                            first_op ? num_syncs : 0);
1584                 if (IS_ERR(fence)) {
1585                         err = PTR_ERR(fence);
1586                         goto err_fences;
1587                 }
1588
1589                 if (fences)
1590                         fences[cur_fence++] = fence;
1591
1592 next:
1593                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1594                         q = list_next_entry(q, multi_gt_list);
1595         }
1596
1597         if (fences) {
1598                 cf = dma_fence_array_create(number_tiles, fences,
1599                                             vm->composite_fence_ctx,
1600                                             vm->composite_fence_seqno++,
1601                                             false);
1602                 if (!cf) {
1603                         --vm->composite_fence_seqno;
1604                         err = -ENOMEM;
1605                         goto err_fences;
1606                 }
1607         }
1608
1609         fence = cf ? &cf->base : !fence ?
1610                 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
1611         if (last_op) {
1612                 for (i = 0; i < num_syncs; i++)
1613                         xe_sync_entry_signal(&syncs[i], fence);
1614         }
1615
1616         return fence;
1617
1618 err_fences:
1619         if (fences) {
1620                 while (cur_fence)
1621                         dma_fence_put(fences[--cur_fence]);
1622                 kfree(fences);
1623         }
1624
1625         return ERR_PTR(err);
1626 }
1627
1628 static struct dma_fence *
1629 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1630                struct xe_sync_entry *syncs, u32 num_syncs,
1631                bool first_op, bool last_op)
1632 {
1633         struct xe_tile *tile;
1634         struct dma_fence *fence;
1635         struct dma_fence **fences = NULL;
1636         struct dma_fence_array *cf = NULL;
1637         struct xe_vm *vm = xe_vma_vm(vma);
1638         int cur_fence = 0, i;
1639         int number_tiles = hweight8(vma->tile_mask);
1640         int err;
1641         u8 id;
1642
1643         trace_xe_vma_bind(vma);
1644
1645         if (number_tiles > 1) {
1646                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1647                                        GFP_KERNEL);
1648                 if (!fences)
1649                         return ERR_PTR(-ENOMEM);
1650         }
1651
1652         for_each_tile(tile, vm->xe, id) {
1653                 if (!(vma->tile_mask & BIT(id)))
1654                         goto next;
1655
1656                 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1657                                          first_op ? syncs : NULL,
1658                                          first_op ? num_syncs : 0,
1659                                          vma->tile_present & BIT(id));
1660                 if (IS_ERR(fence)) {
1661                         err = PTR_ERR(fence);
1662                         goto err_fences;
1663                 }
1664
1665                 if (fences)
1666                         fences[cur_fence++] = fence;
1667
1668 next:
1669                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1670                         q = list_next_entry(q, multi_gt_list);
1671         }
1672
1673         if (fences) {
1674                 cf = dma_fence_array_create(number_tiles, fences,
1675                                             vm->composite_fence_ctx,
1676                                             vm->composite_fence_seqno++,
1677                                             false);
1678                 if (!cf) {
1679                         --vm->composite_fence_seqno;
1680                         err = -ENOMEM;
1681                         goto err_fences;
1682                 }
1683         }
1684
1685         if (last_op) {
1686                 for (i = 0; i < num_syncs; i++)
1687                         xe_sync_entry_signal(&syncs[i],
1688                                              cf ? &cf->base : fence);
1689         }
1690
1691         return cf ? &cf->base : fence;
1692
1693 err_fences:
1694         if (fences) {
1695                 while (cur_fence)
1696                         dma_fence_put(fences[--cur_fence]);
1697                 kfree(fences);
1698         }
1699
1700         return ERR_PTR(err);
1701 }
1702
1703 static struct xe_user_fence *
1704 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1705 {
1706         unsigned int i;
1707
1708         for (i = 0; i < num_syncs; i++) {
1709                 struct xe_sync_entry *e = &syncs[i];
1710
1711                 if (xe_sync_is_ufence(e))
1712                         return xe_sync_ufence_get(e);
1713         }
1714
1715         return NULL;
1716 }
1717
1718 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1719                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1720                         u32 num_syncs, bool immediate, bool first_op,
1721                         bool last_op)
1722 {
1723         struct dma_fence *fence;
1724         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1725         struct xe_user_fence *ufence;
1726
1727         xe_vm_assert_held(vm);
1728
1729         ufence = find_ufence_get(syncs, num_syncs);
1730         if (vma->ufence && ufence)
1731                 xe_sync_ufence_put(vma->ufence);
1732
1733         vma->ufence = ufence ?: vma->ufence;
1734
1735         if (immediate) {
1736                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1737                                        last_op);
1738                 if (IS_ERR(fence))
1739                         return PTR_ERR(fence);
1740         } else {
1741                 int i;
1742
1743                 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1744
1745                 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1746                 if (last_op) {
1747                         for (i = 0; i < num_syncs; i++)
1748                                 xe_sync_entry_signal(&syncs[i], fence);
1749                 }
1750         }
1751
1752         if (last_op)
1753                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1754         dma_fence_put(fence);
1755
1756         return 0;
1757 }
1758
1759 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1760                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1761                       u32 num_syncs, bool immediate, bool first_op,
1762                       bool last_op)
1763 {
1764         int err;
1765
1766         xe_vm_assert_held(vm);
1767         xe_bo_assert_held(bo);
1768
1769         if (bo && immediate) {
1770                 err = xe_bo_validate(bo, vm, true);
1771                 if (err)
1772                         return err;
1773         }
1774
1775         return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1776                             last_op);
1777 }
1778
1779 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1780                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1781                         u32 num_syncs, bool first_op, bool last_op)
1782 {
1783         struct dma_fence *fence;
1784         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1785
1786         xe_vm_assert_held(vm);
1787         xe_bo_assert_held(xe_vma_bo(vma));
1788
1789         fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1790         if (IS_ERR(fence))
1791                 return PTR_ERR(fence);
1792
1793         xe_vma_destroy(vma, fence);
1794         if (last_op)
1795                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1796         dma_fence_put(fence);
1797
1798         return 0;
1799 }
1800
1801 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1802                                     DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1803                                     DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1804
1805 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1806                        struct drm_file *file)
1807 {
1808         struct xe_device *xe = to_xe_device(dev);
1809         struct xe_file *xef = to_xe_file(file);
1810         struct drm_xe_vm_create *args = data;
1811         struct xe_tile *tile;
1812         struct xe_vm *vm;
1813         u32 id, asid;
1814         int err;
1815         u32 flags = 0;
1816
1817         if (XE_IOCTL_DBG(xe, args->extensions))
1818                 return -EINVAL;
1819
1820         if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1821                 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1822
1823         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1824                          !xe->info.has_usm))
1825                 return -EINVAL;
1826
1827         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1828                 return -EINVAL;
1829
1830         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1831                 return -EINVAL;
1832
1833         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1834                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1835                 return -EINVAL;
1836
1837         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1838                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1839                 return -EINVAL;
1840
1841         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1842                          xe_device_in_non_fault_mode(xe)))
1843                 return -EINVAL;
1844
1845         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
1846                          xe_device_in_fault_mode(xe)))
1847                 return -EINVAL;
1848
1849         if (XE_IOCTL_DBG(xe, args->extensions))
1850                 return -EINVAL;
1851
1852         if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1853                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1854         if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1855                 flags |= XE_VM_FLAG_LR_MODE;
1856         if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1857                 flags |= XE_VM_FLAG_FAULT_MODE;
1858
1859         vm = xe_vm_create(xe, flags);
1860         if (IS_ERR(vm))
1861                 return PTR_ERR(vm);
1862
1863         mutex_lock(&xef->vm.lock);
1864         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1865         mutex_unlock(&xef->vm.lock);
1866         if (err)
1867                 goto err_close_and_put;
1868
1869         if (xe->info.has_asid) {
1870                 mutex_lock(&xe->usm.lock);
1871                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1872                                       XA_LIMIT(1, XE_MAX_ASID - 1),
1873                                       &xe->usm.next_asid, GFP_KERNEL);
1874                 mutex_unlock(&xe->usm.lock);
1875                 if (err < 0)
1876                         goto err_free_id;
1877
1878                 vm->usm.asid = asid;
1879         }
1880
1881         args->vm_id = id;
1882         vm->xef = xef;
1883
1884         /* Record BO memory for VM pagetable created against client */
1885         for_each_tile(tile, xe, id)
1886                 if (vm->pt_root[id])
1887                         xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1888
1889 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1890         /* Warning: Security issue - never enable by default */
1891         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1892 #endif
1893
1894         return 0;
1895
1896 err_free_id:
1897         mutex_lock(&xef->vm.lock);
1898         xa_erase(&xef->vm.xa, id);
1899         mutex_unlock(&xef->vm.lock);
1900 err_close_and_put:
1901         xe_vm_close_and_put(vm);
1902
1903         return err;
1904 }
1905
1906 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1907                         struct drm_file *file)
1908 {
1909         struct xe_device *xe = to_xe_device(dev);
1910         struct xe_file *xef = to_xe_file(file);
1911         struct drm_xe_vm_destroy *args = data;
1912         struct xe_vm *vm;
1913         int err = 0;
1914
1915         if (XE_IOCTL_DBG(xe, args->pad) ||
1916             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1917                 return -EINVAL;
1918
1919         mutex_lock(&xef->vm.lock);
1920         vm = xa_load(&xef->vm.xa, args->vm_id);
1921         if (XE_IOCTL_DBG(xe, !vm))
1922                 err = -ENOENT;
1923         else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1924                 err = -EBUSY;
1925         else
1926                 xa_erase(&xef->vm.xa, args->vm_id);
1927         mutex_unlock(&xef->vm.lock);
1928
1929         if (!err)
1930                 xe_vm_close_and_put(vm);
1931
1932         return err;
1933 }
1934
1935 static const u32 region_to_mem_type[] = {
1936         XE_PL_TT,
1937         XE_PL_VRAM0,
1938         XE_PL_VRAM1,
1939 };
1940
1941 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
1942                           struct xe_exec_queue *q, u32 region,
1943                           struct xe_sync_entry *syncs, u32 num_syncs,
1944                           bool first_op, bool last_op)
1945 {
1946         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1947         int err;
1948
1949         xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
1950
1951         if (!xe_vma_has_no_bo(vma)) {
1952                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
1953                 if (err)
1954                         return err;
1955         }
1956
1957         if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated)) {
1958                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
1959                                   true, first_op, last_op);
1960         } else {
1961                 int i;
1962
1963                 /* Nothing to do, signal fences now */
1964                 if (last_op) {
1965                         for (i = 0; i < num_syncs; i++) {
1966                                 struct dma_fence *fence =
1967                                         xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1968
1969                                 xe_sync_entry_signal(&syncs[i], fence);
1970                                 dma_fence_put(fence);
1971                         }
1972                 }
1973
1974                 return 0;
1975         }
1976 }
1977
1978 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
1979                              bool post_commit)
1980 {
1981         down_read(&vm->userptr.notifier_lock);
1982         vma->gpuva.flags |= XE_VMA_DESTROYED;
1983         up_read(&vm->userptr.notifier_lock);
1984         if (post_commit)
1985                 xe_vm_remove_vma(vm, vma);
1986 }
1987
1988 #undef ULL
1989 #define ULL     unsigned long long
1990
1991 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
1992 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
1993 {
1994         struct xe_vma *vma;
1995
1996         switch (op->op) {
1997         case DRM_GPUVA_OP_MAP:
1998                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
1999                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
2000                 break;
2001         case DRM_GPUVA_OP_REMAP:
2002                 vma = gpuva_to_vma(op->remap.unmap->va);
2003                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2004                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2005                        op->remap.unmap->keep ? 1 : 0);
2006                 if (op->remap.prev)
2007                         vm_dbg(&xe->drm,
2008                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2009                                (ULL)op->remap.prev->va.addr,
2010                                (ULL)op->remap.prev->va.range);
2011                 if (op->remap.next)
2012                         vm_dbg(&xe->drm,
2013                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2014                                (ULL)op->remap.next->va.addr,
2015                                (ULL)op->remap.next->va.range);
2016                 break;
2017         case DRM_GPUVA_OP_UNMAP:
2018                 vma = gpuva_to_vma(op->unmap.va);
2019                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2020                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2021                        op->unmap.keep ? 1 : 0);
2022                 break;
2023         case DRM_GPUVA_OP_PREFETCH:
2024                 vma = gpuva_to_vma(op->prefetch.va);
2025                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2026                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2027                 break;
2028         default:
2029                 drm_warn(&xe->drm, "NOT POSSIBLE");
2030         }
2031 }
2032 #else
2033 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2034 {
2035 }
2036 #endif
2037
2038 /*
2039  * Create operations list from IOCTL arguments, setup operations fields so parse
2040  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2041  */
2042 static struct drm_gpuva_ops *
2043 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2044                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2045                          u32 operation, u32 flags,
2046                          u32 prefetch_region, u16 pat_index)
2047 {
2048         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2049         struct drm_gpuva_ops *ops;
2050         struct drm_gpuva_op *__op;
2051         struct drm_gpuvm_bo *vm_bo;
2052         int err;
2053
2054         lockdep_assert_held_write(&vm->lock);
2055
2056         vm_dbg(&vm->xe->drm,
2057                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2058                operation, (ULL)addr, (ULL)range,
2059                (ULL)bo_offset_or_userptr);
2060
2061         switch (operation) {
2062         case DRM_XE_VM_BIND_OP_MAP:
2063         case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2064                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2065                                                   obj, bo_offset_or_userptr);
2066                 break;
2067         case DRM_XE_VM_BIND_OP_UNMAP:
2068                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2069                 break;
2070         case DRM_XE_VM_BIND_OP_PREFETCH:
2071                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2072                 break;
2073         case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2074                 xe_assert(vm->xe, bo);
2075
2076                 err = xe_bo_lock(bo, true);
2077                 if (err)
2078                         return ERR_PTR(err);
2079
2080                 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2081                 if (IS_ERR(vm_bo)) {
2082                         xe_bo_unlock(bo);
2083                         return ERR_CAST(vm_bo);
2084                 }
2085
2086                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2087                 drm_gpuvm_bo_put(vm_bo);
2088                 xe_bo_unlock(bo);
2089                 break;
2090         default:
2091                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2092                 ops = ERR_PTR(-EINVAL);
2093         }
2094         if (IS_ERR(ops))
2095                 return ops;
2096
2097         drm_gpuva_for_each_op(__op, ops) {
2098                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2099
2100                 if (__op->op == DRM_GPUVA_OP_MAP) {
2101                         op->map.immediate =
2102                                 flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2103                         op->map.read_only =
2104                                 flags & DRM_XE_VM_BIND_FLAG_READONLY;
2105                         op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2106                         op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2107                         op->map.pat_index = pat_index;
2108                 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2109                         op->prefetch.region = prefetch_region;
2110                 }
2111
2112                 print_op(vm->xe, __op);
2113         }
2114
2115         return ops;
2116 }
2117
2118 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2119                               u16 pat_index, unsigned int flags)
2120 {
2121         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2122         struct drm_exec exec;
2123         struct xe_vma *vma;
2124         int err;
2125
2126         lockdep_assert_held_write(&vm->lock);
2127
2128         if (bo) {
2129                 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2130                 drm_exec_until_all_locked(&exec) {
2131                         err = 0;
2132                         if (!bo->vm) {
2133                                 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2134                                 drm_exec_retry_on_contention(&exec);
2135                         }
2136                         if (!err) {
2137                                 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2138                                 drm_exec_retry_on_contention(&exec);
2139                         }
2140                         if (err) {
2141                                 drm_exec_fini(&exec);
2142                                 return ERR_PTR(err);
2143                         }
2144                 }
2145         }
2146         vma = xe_vma_create(vm, bo, op->gem.offset,
2147                             op->va.addr, op->va.addr +
2148                             op->va.range - 1, pat_index, flags);
2149         if (bo)
2150                 drm_exec_fini(&exec);
2151
2152         if (xe_vma_is_userptr(vma)) {
2153                 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2154                 if (err) {
2155                         prep_vma_destroy(vm, vma, false);
2156                         xe_vma_destroy_unlocked(vma);
2157                         return ERR_PTR(err);
2158                 }
2159         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2160                 err = add_preempt_fences(vm, bo);
2161                 if (err) {
2162                         prep_vma_destroy(vm, vma, false);
2163                         xe_vma_destroy_unlocked(vma);
2164                         return ERR_PTR(err);
2165                 }
2166         }
2167
2168         return vma;
2169 }
2170
2171 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2172 {
2173         if (vma->gpuva.flags & XE_VMA_PTE_1G)
2174                 return SZ_1G;
2175         else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2176                 return SZ_2M;
2177         else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2178                 return SZ_64K;
2179         else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2180                 return SZ_4K;
2181
2182         return SZ_1G;   /* Uninitialized, used max size */
2183 }
2184
2185 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2186 {
2187         switch (size) {
2188         case SZ_1G:
2189                 vma->gpuva.flags |= XE_VMA_PTE_1G;
2190                 break;
2191         case SZ_2M:
2192                 vma->gpuva.flags |= XE_VMA_PTE_2M;
2193                 break;
2194         case SZ_64K:
2195                 vma->gpuva.flags |= XE_VMA_PTE_64K;
2196                 break;
2197         case SZ_4K:
2198                 vma->gpuva.flags |= XE_VMA_PTE_4K;
2199                 break;
2200         }
2201 }
2202
2203 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2204 {
2205         int err = 0;
2206
2207         lockdep_assert_held_write(&vm->lock);
2208
2209         switch (op->base.op) {
2210         case DRM_GPUVA_OP_MAP:
2211                 err |= xe_vm_insert_vma(vm, op->map.vma);
2212                 if (!err)
2213                         op->flags |= XE_VMA_OP_COMMITTED;
2214                 break;
2215         case DRM_GPUVA_OP_REMAP:
2216         {
2217                 u8 tile_present =
2218                         gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2219
2220                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2221                                  true);
2222                 op->flags |= XE_VMA_OP_COMMITTED;
2223
2224                 if (op->remap.prev) {
2225                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2226                         if (!err)
2227                                 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2228                         if (!err && op->remap.skip_prev) {
2229                                 op->remap.prev->tile_present =
2230                                         tile_present;
2231                                 op->remap.prev = NULL;
2232                         }
2233                 }
2234                 if (op->remap.next) {
2235                         err |= xe_vm_insert_vma(vm, op->remap.next);
2236                         if (!err)
2237                                 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2238                         if (!err && op->remap.skip_next) {
2239                                 op->remap.next->tile_present =
2240                                         tile_present;
2241                                 op->remap.next = NULL;
2242                         }
2243                 }
2244
2245                 /* Adjust for partial unbind after removin VMA from VM */
2246                 if (!err) {
2247                         op->base.remap.unmap->va->va.addr = op->remap.start;
2248                         op->base.remap.unmap->va->va.range = op->remap.range;
2249                 }
2250                 break;
2251         }
2252         case DRM_GPUVA_OP_UNMAP:
2253                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2254                 op->flags |= XE_VMA_OP_COMMITTED;
2255                 break;
2256         case DRM_GPUVA_OP_PREFETCH:
2257                 op->flags |= XE_VMA_OP_COMMITTED;
2258                 break;
2259         default:
2260                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2261         }
2262
2263         return err;
2264 }
2265
2266
2267 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2268                                    struct drm_gpuva_ops *ops,
2269                                    struct xe_sync_entry *syncs, u32 num_syncs,
2270                                    struct list_head *ops_list, bool last)
2271 {
2272         struct xe_device *xe = vm->xe;
2273         struct xe_vma_op *last_op = NULL;
2274         struct drm_gpuva_op *__op;
2275         int err = 0;
2276
2277         lockdep_assert_held_write(&vm->lock);
2278
2279         drm_gpuva_for_each_op(__op, ops) {
2280                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2281                 struct xe_vma *vma;
2282                 bool first = list_empty(ops_list);
2283                 unsigned int flags = 0;
2284
2285                 INIT_LIST_HEAD(&op->link);
2286                 list_add_tail(&op->link, ops_list);
2287
2288                 if (first) {
2289                         op->flags |= XE_VMA_OP_FIRST;
2290                         op->num_syncs = num_syncs;
2291                         op->syncs = syncs;
2292                 }
2293
2294                 op->q = q;
2295
2296                 switch (op->base.op) {
2297                 case DRM_GPUVA_OP_MAP:
2298                 {
2299                         flags |= op->map.read_only ?
2300                                 VMA_CREATE_FLAG_READ_ONLY : 0;
2301                         flags |= op->map.is_null ?
2302                                 VMA_CREATE_FLAG_IS_NULL : 0;
2303                         flags |= op->map.dumpable ?
2304                                 VMA_CREATE_FLAG_DUMPABLE : 0;
2305
2306                         vma = new_vma(vm, &op->base.map, op->map.pat_index,
2307                                       flags);
2308                         if (IS_ERR(vma))
2309                                 return PTR_ERR(vma);
2310
2311                         op->map.vma = vma;
2312                         break;
2313                 }
2314                 case DRM_GPUVA_OP_REMAP:
2315                 {
2316                         struct xe_vma *old =
2317                                 gpuva_to_vma(op->base.remap.unmap->va);
2318
2319                         op->remap.start = xe_vma_start(old);
2320                         op->remap.range = xe_vma_size(old);
2321
2322                         if (op->base.remap.prev) {
2323                                 flags |= op->base.remap.unmap->va->flags &
2324                                         XE_VMA_READ_ONLY ?
2325                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2326                                 flags |= op->base.remap.unmap->va->flags &
2327                                         DRM_GPUVA_SPARSE ?
2328                                         VMA_CREATE_FLAG_IS_NULL : 0;
2329                                 flags |= op->base.remap.unmap->va->flags &
2330                                         XE_VMA_DUMPABLE ?
2331                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2332
2333                                 vma = new_vma(vm, op->base.remap.prev,
2334                                               old->pat_index, flags);
2335                                 if (IS_ERR(vma))
2336                                         return PTR_ERR(vma);
2337
2338                                 op->remap.prev = vma;
2339
2340                                 /*
2341                                  * Userptr creates a new SG mapping so
2342                                  * we must also rebind.
2343                                  */
2344                                 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2345                                         IS_ALIGNED(xe_vma_end(vma),
2346                                                    xe_vma_max_pte_size(old));
2347                                 if (op->remap.skip_prev) {
2348                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2349                                         op->remap.range -=
2350                                                 xe_vma_end(vma) -
2351                                                 xe_vma_start(old);
2352                                         op->remap.start = xe_vma_end(vma);
2353                                         vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2354                                                (ULL)op->remap.start,
2355                                                (ULL)op->remap.range);
2356                                 }
2357                         }
2358
2359                         if (op->base.remap.next) {
2360                                 flags |= op->base.remap.unmap->va->flags &
2361                                         XE_VMA_READ_ONLY ?
2362                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2363                                 flags |= op->base.remap.unmap->va->flags &
2364                                         DRM_GPUVA_SPARSE ?
2365                                         VMA_CREATE_FLAG_IS_NULL : 0;
2366                                 flags |= op->base.remap.unmap->va->flags &
2367                                         XE_VMA_DUMPABLE ?
2368                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2369
2370                                 vma = new_vma(vm, op->base.remap.next,
2371                                               old->pat_index, flags);
2372                                 if (IS_ERR(vma))
2373                                         return PTR_ERR(vma);
2374
2375                                 op->remap.next = vma;
2376
2377                                 /*
2378                                  * Userptr creates a new SG mapping so
2379                                  * we must also rebind.
2380                                  */
2381                                 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2382                                         IS_ALIGNED(xe_vma_start(vma),
2383                                                    xe_vma_max_pte_size(old));
2384                                 if (op->remap.skip_next) {
2385                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2386                                         op->remap.range -=
2387                                                 xe_vma_end(old) -
2388                                                 xe_vma_start(vma);
2389                                         vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2390                                                (ULL)op->remap.start,
2391                                                (ULL)op->remap.range);
2392                                 }
2393                         }
2394                         break;
2395                 }
2396                 case DRM_GPUVA_OP_UNMAP:
2397                 case DRM_GPUVA_OP_PREFETCH:
2398                         /* Nothing to do */
2399                         break;
2400                 default:
2401                         drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2402                 }
2403
2404                 last_op = op;
2405
2406                 err = xe_vma_op_commit(vm, op);
2407                 if (err)
2408                         return err;
2409         }
2410
2411         /* FIXME: Unhandled corner case */
2412         XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2413
2414         if (!last_op)
2415                 return 0;
2416
2417         last_op->ops = ops;
2418         if (last) {
2419                 last_op->flags |= XE_VMA_OP_LAST;
2420                 last_op->num_syncs = num_syncs;
2421                 last_op->syncs = syncs;
2422         }
2423
2424         return 0;
2425 }
2426
2427 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2428                       struct xe_vma *vma, struct xe_vma_op *op)
2429 {
2430         int err;
2431
2432         lockdep_assert_held_write(&vm->lock);
2433
2434         err = xe_vm_lock_vma(exec, vma);
2435         if (err)
2436                 return err;
2437
2438         xe_vm_assert_held(vm);
2439         xe_bo_assert_held(xe_vma_bo(vma));
2440
2441         switch (op->base.op) {
2442         case DRM_GPUVA_OP_MAP:
2443                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2444                                  op->syncs, op->num_syncs,
2445                                  op->map.immediate || !xe_vm_in_fault_mode(vm),
2446                                  op->flags & XE_VMA_OP_FIRST,
2447                                  op->flags & XE_VMA_OP_LAST);
2448                 break;
2449         case DRM_GPUVA_OP_REMAP:
2450         {
2451                 bool prev = !!op->remap.prev;
2452                 bool next = !!op->remap.next;
2453
2454                 if (!op->remap.unmap_done) {
2455                         if (prev || next)
2456                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2457                         err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2458                                            op->num_syncs,
2459                                            op->flags & XE_VMA_OP_FIRST,
2460                                            op->flags & XE_VMA_OP_LAST &&
2461                                            !prev && !next);
2462                         if (err)
2463                                 break;
2464                         op->remap.unmap_done = true;
2465                 }
2466
2467                 if (prev) {
2468                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2469                         err = xe_vm_bind(vm, op->remap.prev, op->q,
2470                                          xe_vma_bo(op->remap.prev), op->syncs,
2471                                          op->num_syncs, true, false,
2472                                          op->flags & XE_VMA_OP_LAST && !next);
2473                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2474                         if (err)
2475                                 break;
2476                         op->remap.prev = NULL;
2477                 }
2478
2479                 if (next) {
2480                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2481                         err = xe_vm_bind(vm, op->remap.next, op->q,
2482                                          xe_vma_bo(op->remap.next),
2483                                          op->syncs, op->num_syncs,
2484                                          true, false,
2485                                          op->flags & XE_VMA_OP_LAST);
2486                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2487                         if (err)
2488                                 break;
2489                         op->remap.next = NULL;
2490                 }
2491
2492                 break;
2493         }
2494         case DRM_GPUVA_OP_UNMAP:
2495                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2496                                    op->num_syncs, op->flags & XE_VMA_OP_FIRST,
2497                                    op->flags & XE_VMA_OP_LAST);
2498                 break;
2499         case DRM_GPUVA_OP_PREFETCH:
2500                 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2501                                      op->syncs, op->num_syncs,
2502                                      op->flags & XE_VMA_OP_FIRST,
2503                                      op->flags & XE_VMA_OP_LAST);
2504                 break;
2505         default:
2506                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2507         }
2508
2509         if (err)
2510                 trace_xe_vma_fail(vma);
2511
2512         return err;
2513 }
2514
2515 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2516                                struct xe_vma_op *op)
2517 {
2518         struct drm_exec exec;
2519         int err;
2520
2521 retry_userptr:
2522         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2523         drm_exec_until_all_locked(&exec) {
2524                 err = op_execute(&exec, vm, vma, op);
2525                 drm_exec_retry_on_contention(&exec);
2526                 if (err)
2527                         break;
2528         }
2529         drm_exec_fini(&exec);
2530
2531         if (err == -EAGAIN) {
2532                 lockdep_assert_held_write(&vm->lock);
2533
2534                 if (op->base.op == DRM_GPUVA_OP_REMAP) {
2535                         if (!op->remap.unmap_done)
2536                                 vma = gpuva_to_vma(op->base.remap.unmap->va);
2537                         else if (op->remap.prev)
2538                                 vma = op->remap.prev;
2539                         else
2540                                 vma = op->remap.next;
2541                 }
2542
2543                 if (xe_vma_is_userptr(vma)) {
2544                         err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2545                         if (!err)
2546                                 goto retry_userptr;
2547
2548                         trace_xe_vma_fail(vma);
2549                 }
2550         }
2551
2552         return err;
2553 }
2554
2555 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2556 {
2557         int ret = 0;
2558
2559         lockdep_assert_held_write(&vm->lock);
2560
2561         switch (op->base.op) {
2562         case DRM_GPUVA_OP_MAP:
2563                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2564                 break;
2565         case DRM_GPUVA_OP_REMAP:
2566         {
2567                 struct xe_vma *vma;
2568
2569                 if (!op->remap.unmap_done)
2570                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2571                 else if (op->remap.prev)
2572                         vma = op->remap.prev;
2573                 else
2574                         vma = op->remap.next;
2575
2576                 ret = __xe_vma_op_execute(vm, vma, op);
2577                 break;
2578         }
2579         case DRM_GPUVA_OP_UNMAP:
2580                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2581                                           op);
2582                 break;
2583         case DRM_GPUVA_OP_PREFETCH:
2584                 ret = __xe_vma_op_execute(vm,
2585                                           gpuva_to_vma(op->base.prefetch.va),
2586                                           op);
2587                 break;
2588         default:
2589                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2590         }
2591
2592         return ret;
2593 }
2594
2595 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2596 {
2597         bool last = op->flags & XE_VMA_OP_LAST;
2598
2599         if (last) {
2600                 while (op->num_syncs--)
2601                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2602                 kfree(op->syncs);
2603                 if (op->q)
2604                         xe_exec_queue_put(op->q);
2605         }
2606         if (!list_empty(&op->link))
2607                 list_del(&op->link);
2608         if (op->ops)
2609                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2610         if (last)
2611                 xe_vm_put(vm);
2612 }
2613
2614 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2615                              bool post_commit, bool prev_post_commit,
2616                              bool next_post_commit)
2617 {
2618         lockdep_assert_held_write(&vm->lock);
2619
2620         switch (op->base.op) {
2621         case DRM_GPUVA_OP_MAP:
2622                 if (op->map.vma) {
2623                         prep_vma_destroy(vm, op->map.vma, post_commit);
2624                         xe_vma_destroy_unlocked(op->map.vma);
2625                 }
2626                 break;
2627         case DRM_GPUVA_OP_UNMAP:
2628         {
2629                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2630
2631                 if (vma) {
2632                         down_read(&vm->userptr.notifier_lock);
2633                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2634                         up_read(&vm->userptr.notifier_lock);
2635                         if (post_commit)
2636                                 xe_vm_insert_vma(vm, vma);
2637                 }
2638                 break;
2639         }
2640         case DRM_GPUVA_OP_REMAP:
2641         {
2642                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2643
2644                 if (op->remap.prev) {
2645                         prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2646                         xe_vma_destroy_unlocked(op->remap.prev);
2647                 }
2648                 if (op->remap.next) {
2649                         prep_vma_destroy(vm, op->remap.next, next_post_commit);
2650                         xe_vma_destroy_unlocked(op->remap.next);
2651                 }
2652                 if (vma) {
2653                         down_read(&vm->userptr.notifier_lock);
2654                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2655                         up_read(&vm->userptr.notifier_lock);
2656                         if (post_commit)
2657                                 xe_vm_insert_vma(vm, vma);
2658                 }
2659                 break;
2660         }
2661         case DRM_GPUVA_OP_PREFETCH:
2662                 /* Nothing to do */
2663                 break;
2664         default:
2665                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2666         }
2667 }
2668
2669 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2670                                      struct drm_gpuva_ops **ops,
2671                                      int num_ops_list)
2672 {
2673         int i;
2674
2675         for (i = num_ops_list - 1; i >= 0; --i) {
2676                 struct drm_gpuva_ops *__ops = ops[i];
2677                 struct drm_gpuva_op *__op;
2678
2679                 if (!__ops)
2680                         continue;
2681
2682                 drm_gpuva_for_each_op_reverse(__op, __ops) {
2683                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2684
2685                         xe_vma_op_unwind(vm, op,
2686                                          op->flags & XE_VMA_OP_COMMITTED,
2687                                          op->flags & XE_VMA_OP_PREV_COMMITTED,
2688                                          op->flags & XE_VMA_OP_NEXT_COMMITTED);
2689                 }
2690
2691                 drm_gpuva_ops_free(&vm->gpuvm, __ops);
2692         }
2693 }
2694
2695 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2696                                      struct list_head *ops_list)
2697 {
2698         struct xe_vma_op *op, *next;
2699         int err;
2700
2701         lockdep_assert_held_write(&vm->lock);
2702
2703         list_for_each_entry_safe(op, next, ops_list, link) {
2704                 err = xe_vma_op_execute(vm, op);
2705                 if (err) {
2706                         drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2707                                  op->base.op, err);
2708                         /*
2709                          * FIXME: Killing VM rather than proper error handling
2710                          */
2711                         xe_vm_kill(vm);
2712                         return -ENOSPC;
2713                 }
2714                 xe_vma_op_cleanup(vm, op);
2715         }
2716
2717         return 0;
2718 }
2719
2720 #define SUPPORTED_FLAGS \
2721         (DRM_XE_VM_BIND_FLAG_READONLY | \
2722          DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
2723          DRM_XE_VM_BIND_FLAG_NULL | \
2724          DRM_XE_VM_BIND_FLAG_DUMPABLE)
2725 #define XE_64K_PAGE_MASK 0xffffull
2726 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2727
2728 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2729                                     struct drm_xe_vm_bind *args,
2730                                     struct drm_xe_vm_bind_op **bind_ops)
2731 {
2732         int err;
2733         int i;
2734
2735         if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2736             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2737                 return -EINVAL;
2738
2739         if (XE_IOCTL_DBG(xe, args->extensions))
2740                 return -EINVAL;
2741
2742         if (args->num_binds > 1) {
2743                 u64 __user *bind_user =
2744                         u64_to_user_ptr(args->vector_of_binds);
2745
2746                 *bind_ops = kvmalloc_array(args->num_binds,
2747                                            sizeof(struct drm_xe_vm_bind_op),
2748                                            GFP_KERNEL | __GFP_ACCOUNT);
2749                 if (!*bind_ops)
2750                         return -ENOMEM;
2751
2752                 err = __copy_from_user(*bind_ops, bind_user,
2753                                        sizeof(struct drm_xe_vm_bind_op) *
2754                                        args->num_binds);
2755                 if (XE_IOCTL_DBG(xe, err)) {
2756                         err = -EFAULT;
2757                         goto free_bind_ops;
2758                 }
2759         } else {
2760                 *bind_ops = &args->bind;
2761         }
2762
2763         for (i = 0; i < args->num_binds; ++i) {
2764                 u64 range = (*bind_ops)[i].range;
2765                 u64 addr = (*bind_ops)[i].addr;
2766                 u32 op = (*bind_ops)[i].op;
2767                 u32 flags = (*bind_ops)[i].flags;
2768                 u32 obj = (*bind_ops)[i].obj;
2769                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2770                 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2771                 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2772                 u16 pat_index = (*bind_ops)[i].pat_index;
2773                 u16 coh_mode;
2774
2775                 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2776                         err = -EINVAL;
2777                         goto free_bind_ops;
2778                 }
2779
2780                 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2781                 (*bind_ops)[i].pat_index = pat_index;
2782                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2783                 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2784                         err = -EINVAL;
2785                         goto free_bind_ops;
2786                 }
2787
2788                 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2789                         err = -EINVAL;
2790                         goto free_bind_ops;
2791                 }
2792
2793                 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2794                     XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2795                     XE_IOCTL_DBG(xe, obj && is_null) ||
2796                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2797                     XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2798                                  is_null) ||
2799                     XE_IOCTL_DBG(xe, !obj &&
2800                                  op == DRM_XE_VM_BIND_OP_MAP &&
2801                                  !is_null) ||
2802                     XE_IOCTL_DBG(xe, !obj &&
2803                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2804                     XE_IOCTL_DBG(xe, addr &&
2805                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2806                     XE_IOCTL_DBG(xe, range &&
2807                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2808                     XE_IOCTL_DBG(xe, obj &&
2809                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2810                     XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2811                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2812                     XE_IOCTL_DBG(xe, obj &&
2813                                  op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2814                     XE_IOCTL_DBG(xe, prefetch_region &&
2815                                  op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2816                     XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2817                                        xe->info.mem_region_mask)) ||
2818                     XE_IOCTL_DBG(xe, obj &&
2819                                  op == DRM_XE_VM_BIND_OP_UNMAP)) {
2820                         err = -EINVAL;
2821                         goto free_bind_ops;
2822                 }
2823
2824                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2825                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2826                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2827                     XE_IOCTL_DBG(xe, !range &&
2828                                  op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2829                         err = -EINVAL;
2830                         goto free_bind_ops;
2831                 }
2832         }
2833
2834         return 0;
2835
2836 free_bind_ops:
2837         if (args->num_binds > 1)
2838                 kvfree(*bind_ops);
2839         return err;
2840 }
2841
2842 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2843                                        struct xe_exec_queue *q,
2844                                        struct xe_sync_entry *syncs,
2845                                        int num_syncs)
2846 {
2847         struct dma_fence *fence;
2848         int i, err = 0;
2849
2850         fence = xe_sync_in_fence_get(syncs, num_syncs,
2851                                      to_wait_exec_queue(vm, q), vm);
2852         if (IS_ERR(fence))
2853                 return PTR_ERR(fence);
2854
2855         for (i = 0; i < num_syncs; i++)
2856                 xe_sync_entry_signal(&syncs[i], fence);
2857
2858         xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2859                                      fence);
2860         dma_fence_put(fence);
2861
2862         return err;
2863 }
2864
2865 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2866 {
2867         struct xe_device *xe = to_xe_device(dev);
2868         struct xe_file *xef = to_xe_file(file);
2869         struct drm_xe_vm_bind *args = data;
2870         struct drm_xe_sync __user *syncs_user;
2871         struct xe_bo **bos = NULL;
2872         struct drm_gpuva_ops **ops = NULL;
2873         struct xe_vm *vm;
2874         struct xe_exec_queue *q = NULL;
2875         u32 num_syncs, num_ufence = 0;
2876         struct xe_sync_entry *syncs = NULL;
2877         struct drm_xe_vm_bind_op *bind_ops;
2878         LIST_HEAD(ops_list);
2879         int err;
2880         int i;
2881
2882         err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2883         if (err)
2884                 return err;
2885
2886         if (args->exec_queue_id) {
2887                 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2888                 if (XE_IOCTL_DBG(xe, !q)) {
2889                         err = -ENOENT;
2890                         goto free_objs;
2891                 }
2892
2893                 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2894                         err = -EINVAL;
2895                         goto put_exec_queue;
2896                 }
2897         }
2898
2899         vm = xe_vm_lookup(xef, args->vm_id);
2900         if (XE_IOCTL_DBG(xe, !vm)) {
2901                 err = -EINVAL;
2902                 goto put_exec_queue;
2903         }
2904
2905         err = down_write_killable(&vm->lock);
2906         if (err)
2907                 goto put_vm;
2908
2909         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2910                 err = -ENOENT;
2911                 goto release_vm_lock;
2912         }
2913
2914         for (i = 0; i < args->num_binds; ++i) {
2915                 u64 range = bind_ops[i].range;
2916                 u64 addr = bind_ops[i].addr;
2917
2918                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2919                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2920                         err = -EINVAL;
2921                         goto release_vm_lock;
2922                 }
2923         }
2924
2925         if (args->num_binds) {
2926                 bos = kvcalloc(args->num_binds, sizeof(*bos),
2927                                GFP_KERNEL | __GFP_ACCOUNT);
2928                 if (!bos) {
2929                         err = -ENOMEM;
2930                         goto release_vm_lock;
2931                 }
2932
2933                 ops = kvcalloc(args->num_binds, sizeof(*ops),
2934                                GFP_KERNEL | __GFP_ACCOUNT);
2935                 if (!ops) {
2936                         err = -ENOMEM;
2937                         goto release_vm_lock;
2938                 }
2939         }
2940
2941         for (i = 0; i < args->num_binds; ++i) {
2942                 struct drm_gem_object *gem_obj;
2943                 u64 range = bind_ops[i].range;
2944                 u64 addr = bind_ops[i].addr;
2945                 u32 obj = bind_ops[i].obj;
2946                 u64 obj_offset = bind_ops[i].obj_offset;
2947                 u16 pat_index = bind_ops[i].pat_index;
2948                 u16 coh_mode;
2949
2950                 if (!obj)
2951                         continue;
2952
2953                 gem_obj = drm_gem_object_lookup(file, obj);
2954                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
2955                         err = -ENOENT;
2956                         goto put_obj;
2957                 }
2958                 bos[i] = gem_to_xe_bo(gem_obj);
2959
2960                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
2961                     XE_IOCTL_DBG(xe, obj_offset >
2962                                  bos[i]->size - range)) {
2963                         err = -EINVAL;
2964                         goto put_obj;
2965                 }
2966
2967                 if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
2968                         if (XE_IOCTL_DBG(xe, obj_offset &
2969                                          XE_64K_PAGE_MASK) ||
2970                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
2971                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
2972                                 err = -EINVAL;
2973                                 goto put_obj;
2974                         }
2975                 }
2976
2977                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2978                 if (bos[i]->cpu_caching) {
2979                         if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2980                                          bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
2981                                 err = -EINVAL;
2982                                 goto put_obj;
2983                         }
2984                 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
2985                         /*
2986                          * Imported dma-buf from a different device should
2987                          * require 1way or 2way coherency since we don't know
2988                          * how it was mapped on the CPU. Just assume is it
2989                          * potentially cached on CPU side.
2990                          */
2991                         err = -EINVAL;
2992                         goto put_obj;
2993                 }
2994         }
2995
2996         if (args->num_syncs) {
2997                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
2998                 if (!syncs) {
2999                         err = -ENOMEM;
3000                         goto put_obj;
3001                 }
3002         }
3003
3004         syncs_user = u64_to_user_ptr(args->syncs);
3005         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3006                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3007                                           &syncs_user[num_syncs],
3008                                           (xe_vm_in_lr_mode(vm) ?
3009                                            SYNC_PARSE_FLAG_LR_MODE : 0) |
3010                                           (!args->num_binds ?
3011                                            SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3012                 if (err)
3013                         goto free_syncs;
3014
3015                 if (xe_sync_is_ufence(&syncs[num_syncs]))
3016                         num_ufence++;
3017         }
3018
3019         if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3020                 err = -EINVAL;
3021                 goto free_syncs;
3022         }
3023
3024         if (!args->num_binds) {
3025                 err = -ENODATA;
3026                 goto free_syncs;
3027         }
3028
3029         for (i = 0; i < args->num_binds; ++i) {
3030                 u64 range = bind_ops[i].range;
3031                 u64 addr = bind_ops[i].addr;
3032                 u32 op = bind_ops[i].op;
3033                 u32 flags = bind_ops[i].flags;
3034                 u64 obj_offset = bind_ops[i].obj_offset;
3035                 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3036                 u16 pat_index = bind_ops[i].pat_index;
3037
3038                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3039                                                   addr, range, op, flags,
3040                                                   prefetch_region, pat_index);
3041                 if (IS_ERR(ops[i])) {
3042                         err = PTR_ERR(ops[i]);
3043                         ops[i] = NULL;
3044                         goto unwind_ops;
3045                 }
3046
3047                 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3048                                               &ops_list,
3049                                               i == args->num_binds - 1);
3050                 if (err)
3051                         goto unwind_ops;
3052         }
3053
3054         /* Nothing to do */
3055         if (list_empty(&ops_list)) {
3056                 err = -ENODATA;
3057                 goto unwind_ops;
3058         }
3059
3060         xe_vm_get(vm);
3061         if (q)
3062                 xe_exec_queue_get(q);
3063
3064         err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3065
3066         up_write(&vm->lock);
3067
3068         if (q)
3069                 xe_exec_queue_put(q);
3070         xe_vm_put(vm);
3071
3072         for (i = 0; bos && i < args->num_binds; ++i)
3073                 xe_bo_put(bos[i]);
3074
3075         kvfree(bos);
3076         kvfree(ops);
3077         if (args->num_binds > 1)
3078                 kvfree(bind_ops);
3079
3080         return err;
3081
3082 unwind_ops:
3083         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3084 free_syncs:
3085         if (err == -ENODATA)
3086                 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3087         while (num_syncs--)
3088                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3089
3090         kfree(syncs);
3091 put_obj:
3092         for (i = 0; i < args->num_binds; ++i)
3093                 xe_bo_put(bos[i]);
3094 release_vm_lock:
3095         up_write(&vm->lock);
3096 put_vm:
3097         xe_vm_put(vm);
3098 put_exec_queue:
3099         if (q)
3100                 xe_exec_queue_put(q);
3101 free_objs:
3102         kvfree(bos);
3103         kvfree(ops);
3104         if (args->num_binds > 1)
3105                 kvfree(bind_ops);
3106         return err;
3107 }
3108
3109 /**
3110  * xe_vm_lock() - Lock the vm's dma_resv object
3111  * @vm: The struct xe_vm whose lock is to be locked
3112  * @intr: Whether to perform any wait interruptible
3113  *
3114  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3115  * contended lock was interrupted. If @intr is false, the function
3116  * always returns 0.
3117  */
3118 int xe_vm_lock(struct xe_vm *vm, bool intr)
3119 {
3120         if (intr)
3121                 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3122
3123         return dma_resv_lock(xe_vm_resv(vm), NULL);
3124 }
3125
3126 /**
3127  * xe_vm_unlock() - Unlock the vm's dma_resv object
3128  * @vm: The struct xe_vm whose lock is to be released.
3129  *
3130  * Unlock a buffer object lock that was locked by xe_vm_lock().
3131  */
3132 void xe_vm_unlock(struct xe_vm *vm)
3133 {
3134         dma_resv_unlock(xe_vm_resv(vm));
3135 }
3136
3137 /**
3138  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3139  * @vma: VMA to invalidate
3140  *
3141  * Walks a list of page tables leaves which it memset the entries owned by this
3142  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3143  * complete.
3144  *
3145  * Returns 0 for success, negative error code otherwise.
3146  */
3147 int xe_vm_invalidate_vma(struct xe_vma *vma)
3148 {
3149         struct xe_device *xe = xe_vma_vm(vma)->xe;
3150         struct xe_tile *tile;
3151         u32 tile_needs_invalidate = 0;
3152         int seqno[XE_MAX_TILES_PER_DEVICE];
3153         u8 id;
3154         int ret;
3155
3156         xe_assert(xe, !xe_vma_is_null(vma));
3157         trace_xe_vma_invalidate(vma);
3158
3159         vm_dbg(&xe_vma_vm(vma)->xe->drm,
3160                "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3161                 xe_vma_start(vma), xe_vma_size(vma));
3162
3163         /* Check that we don't race with page-table updates */
3164         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3165                 if (xe_vma_is_userptr(vma)) {
3166                         WARN_ON_ONCE(!mmu_interval_check_retry
3167                                      (&to_userptr_vma(vma)->userptr.notifier,
3168                                       to_userptr_vma(vma)->userptr.notifier_seq));
3169                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3170                                                              DMA_RESV_USAGE_BOOKKEEP));
3171
3172                 } else {
3173                         xe_bo_assert_held(xe_vma_bo(vma));
3174                 }
3175         }
3176
3177         for_each_tile(tile, xe, id) {
3178                 if (xe_pt_zap_ptes(tile, vma)) {
3179                         tile_needs_invalidate |= BIT(id);
3180                         xe_device_wmb(xe);
3181                         /*
3182                          * FIXME: We potentially need to invalidate multiple
3183                          * GTs within the tile
3184                          */
3185                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3186                         if (seqno[id] < 0)
3187                                 return seqno[id];
3188                 }
3189         }
3190
3191         for_each_tile(tile, xe, id) {
3192                 if (tile_needs_invalidate & BIT(id)) {
3193                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3194                         if (ret < 0)
3195                                 return ret;
3196                 }
3197         }
3198
3199         vma->tile_invalidated = vma->tile_mask;
3200
3201         return 0;
3202 }
3203
3204 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3205 {
3206         struct drm_gpuva *gpuva;
3207         bool is_vram;
3208         uint64_t addr;
3209
3210         if (!down_read_trylock(&vm->lock)) {
3211                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3212                 return 0;
3213         }
3214         if (vm->pt_root[gt_id]) {
3215                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3216                 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3217                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3218                            is_vram ? "VRAM" : "SYS");
3219         }
3220
3221         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3222                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3223                 bool is_userptr = xe_vma_is_userptr(vma);
3224                 bool is_null = xe_vma_is_null(vma);
3225
3226                 if (is_null) {
3227                         addr = 0;
3228                 } else if (is_userptr) {
3229                         struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
3230                         struct xe_res_cursor cur;
3231
3232                         if (sg) {
3233                                 xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
3234                                 addr = xe_res_dma(&cur);
3235                         } else {
3236                                 addr = 0;
3237                         }
3238                 } else {
3239                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3240                         is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3241                 }
3242                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3243                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3244                            xe_vma_size(vma),
3245                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3246                            is_vram ? "VRAM" : "SYS");
3247         }
3248         up_read(&vm->lock);
3249
3250         return 0;
3251 }
3252
3253 struct xe_vm_snapshot {
3254         unsigned long num_snaps;
3255         struct {
3256                 u64 ofs, bo_ofs;
3257                 unsigned long len;
3258                 struct xe_bo *bo;
3259                 void *data;
3260                 struct mm_struct *mm;
3261         } snap[];
3262 };
3263
3264 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3265 {
3266         unsigned long num_snaps = 0, i;
3267         struct xe_vm_snapshot *snap = NULL;
3268         struct drm_gpuva *gpuva;
3269
3270         if (!vm)
3271                 return NULL;
3272
3273         mutex_lock(&vm->snap_mutex);
3274         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3275                 if (gpuva->flags & XE_VMA_DUMPABLE)
3276                         num_snaps++;
3277         }
3278
3279         if (num_snaps)
3280                 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3281         if (!snap) {
3282                 snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
3283                 goto out_unlock;
3284         }
3285
3286         snap->num_snaps = num_snaps;
3287         i = 0;
3288         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3289                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3290                 struct xe_bo *bo = vma->gpuva.gem.obj ?
3291                         gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3292
3293                 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3294                         continue;
3295
3296                 snap->snap[i].ofs = xe_vma_start(vma);
3297                 snap->snap[i].len = xe_vma_size(vma);
3298                 if (bo) {
3299                         snap->snap[i].bo = xe_bo_get(bo);
3300                         snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3301                 } else if (xe_vma_is_userptr(vma)) {
3302                         struct mm_struct *mm =
3303                                 to_userptr_vma(vma)->userptr.notifier.mm;
3304
3305                         if (mmget_not_zero(mm))
3306                                 snap->snap[i].mm = mm;
3307                         else
3308                                 snap->snap[i].data = ERR_PTR(-EFAULT);
3309
3310                         snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3311                 } else {
3312                         snap->snap[i].data = ERR_PTR(-ENOENT);
3313                 }
3314                 i++;
3315         }
3316
3317 out_unlock:
3318         mutex_unlock(&vm->snap_mutex);
3319         return snap;
3320 }
3321
3322 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3323 {
3324         if (IS_ERR_OR_NULL(snap))
3325                 return;
3326
3327         for (int i = 0; i < snap->num_snaps; i++) {
3328                 struct xe_bo *bo = snap->snap[i].bo;
3329                 struct iosys_map src;
3330                 int err;
3331
3332                 if (IS_ERR(snap->snap[i].data))
3333                         continue;
3334
3335                 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3336                 if (!snap->snap[i].data) {
3337                         snap->snap[i].data = ERR_PTR(-ENOMEM);
3338                         goto cleanup_bo;
3339                 }
3340
3341                 if (bo) {
3342                         dma_resv_lock(bo->ttm.base.resv, NULL);
3343                         err = ttm_bo_vmap(&bo->ttm, &src);
3344                         if (!err) {
3345                                 xe_map_memcpy_from(xe_bo_device(bo),
3346                                                    snap->snap[i].data,
3347                                                    &src, snap->snap[i].bo_ofs,
3348                                                    snap->snap[i].len);
3349                                 ttm_bo_vunmap(&bo->ttm, &src);
3350                         }
3351                         dma_resv_unlock(bo->ttm.base.resv);
3352                 } else {
3353                         void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3354
3355                         kthread_use_mm(snap->snap[i].mm);
3356                         if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3357                                 err = 0;
3358                         else
3359                                 err = -EFAULT;
3360                         kthread_unuse_mm(snap->snap[i].mm);
3361
3362                         mmput(snap->snap[i].mm);
3363                         snap->snap[i].mm = NULL;
3364                 }
3365
3366                 if (err) {
3367                         kvfree(snap->snap[i].data);
3368                         snap->snap[i].data = ERR_PTR(err);
3369                 }
3370
3371 cleanup_bo:
3372                 xe_bo_put(bo);
3373                 snap->snap[i].bo = NULL;
3374         }
3375 }
3376
3377 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3378 {
3379         unsigned long i, j;
3380
3381         if (IS_ERR_OR_NULL(snap)) {
3382                 drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
3383                 return;
3384         }
3385
3386         for (i = 0; i < snap->num_snaps; i++) {
3387                 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3388
3389                 if (IS_ERR(snap->snap[i].data)) {
3390                         drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
3391                                    PTR_ERR(snap->snap[i].data));
3392                         continue;
3393                 }
3394
3395                 drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
3396
3397                 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3398                         u32 *val = snap->snap[i].data + j;
3399                         char dumped[ASCII85_BUFSZ];
3400
3401                         drm_puts(p, ascii85_encode(*val, dumped));
3402                 }
3403
3404                 drm_puts(p, "\n");
3405         }
3406 }
3407
3408 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3409 {
3410         unsigned long i;
3411
3412         if (IS_ERR_OR_NULL(snap))
3413                 return;
3414
3415         for (i = 0; i < snap->num_snaps; i++) {
3416                 if (!IS_ERR(snap->snap[i].data))
3417                         kvfree(snap->snap[i].data);
3418                 xe_bo_put(snap->snap[i].bo);
3419                 if (snap->snap[i].mm)
3420                         mmput(snap->snap[i].mm);
3421         }
3422         kvfree(snap);
3423 }
This page took 0.237157 seconds and 4 git commands to generate.