]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drm/amdgpu: rework VM state machine lock handling v2
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/dma-fence-array.h>
29 #include <linux/interval_tree_generic.h>
30 #include <linux/idr.h>
31 #include <drm/drmP.h>
32 #include <drm/amdgpu_drm.h>
33 #include "amdgpu.h"
34 #include "amdgpu_trace.h"
35 #include "amdgpu_amdkfd.h"
36
37 /*
38  * GPUVM
39  * GPUVM is similar to the legacy gart on older asics, however
40  * rather than there being a single global gart table
41  * for the entire GPU, there are multiple VM page tables active
42  * at any given time.  The VM page tables can contain a mix
43  * vram pages and system memory pages and system memory pages
44  * can be mapped as snooped (cached system pages) or unsnooped
45  * (uncached system pages).
46  * Each VM has an ID associated with it and there is a page table
47  * associated with each VMID.  When execting a command buffer,
48  * the kernel tells the the ring what VMID to use for that command
49  * buffer.  VMIDs are allocated dynamically as commands are submitted.
50  * The userspace drivers maintain their own address space and the kernel
51  * sets up their pages tables accordingly when they submit their
52  * command buffers and a VMID is assigned.
53  * Cayman/Trinity support up to 8 active VMs at any given time;
54  * SI supports 16.
55  */
56
57 #define START(node) ((node)->start)
58 #define LAST(node) ((node)->last)
59
60 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
61                      START, LAST, static, amdgpu_vm_it)
62
63 #undef START
64 #undef LAST
65
66 /* Local structure. Encapsulate some VM table update parameters to reduce
67  * the number of function parameters
68  */
69 struct amdgpu_pte_update_params {
70         /* amdgpu device we do this update for */
71         struct amdgpu_device *adev;
72         /* optional amdgpu_vm we do this update for */
73         struct amdgpu_vm *vm;
74         /* address where to copy page table entries from */
75         uint64_t src;
76         /* indirect buffer to fill with commands */
77         struct amdgpu_ib *ib;
78         /* Function which actually does the update */
79         void (*func)(struct amdgpu_pte_update_params *params,
80                      struct amdgpu_bo *bo, uint64_t pe,
81                      uint64_t addr, unsigned count, uint32_t incr,
82                      uint64_t flags);
83         /* The next two are used during VM update by CPU
84          *  DMA addresses to use for mapping
85          *  Kernel pointer of PD/PT BO that needs to be updated
86          */
87         dma_addr_t *pages_addr;
88         void *kptr;
89 };
90
91 /* Helper to disable partial resident texture feature from a fence callback */
92 struct amdgpu_prt_cb {
93         struct amdgpu_device *adev;
94         struct dma_fence_cb cb;
95 };
96
97 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
98                                    struct amdgpu_vm *vm,
99                                    struct amdgpu_bo *bo)
100 {
101         base->vm = vm;
102         base->bo = bo;
103         INIT_LIST_HEAD(&base->bo_list);
104         INIT_LIST_HEAD(&base->vm_status);
105
106         if (!bo)
107                 return;
108         list_add_tail(&base->bo_list, &bo->va);
109
110         if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
111                 return;
112
113         if (bo->preferred_domains &
114             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
115                 return;
116
117         /*
118          * we checked all the prerequisites, but it looks like this per vm bo
119          * is currently evicted. add the bo to the evicted list to make sure it
120          * is validated on next vm use to avoid fault.
121          * */
122         list_move_tail(&base->vm_status, &vm->evicted);
123 }
124
125 /**
126  * amdgpu_vm_level_shift - return the addr shift for each level
127  *
128  * @adev: amdgpu_device pointer
129  *
130  * Returns the number of bits the pfn needs to be right shifted for a level.
131  */
132 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
133                                       unsigned level)
134 {
135         unsigned shift = 0xff;
136
137         switch (level) {
138         case AMDGPU_VM_PDB2:
139         case AMDGPU_VM_PDB1:
140         case AMDGPU_VM_PDB0:
141                 shift = 9 * (AMDGPU_VM_PDB0 - level) +
142                         adev->vm_manager.block_size;
143                 break;
144         case AMDGPU_VM_PTB:
145                 shift = 0;
146                 break;
147         default:
148                 dev_err(adev->dev, "the level%d isn't supported.\n", level);
149         }
150
151         return shift;
152 }
153
154 /**
155  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
156  *
157  * @adev: amdgpu_device pointer
158  *
159  * Calculate the number of entries in a page directory or page table.
160  */
161 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
162                                       unsigned level)
163 {
164         unsigned shift = amdgpu_vm_level_shift(adev,
165                                                adev->vm_manager.root_level);
166
167         if (level == adev->vm_manager.root_level)
168                 /* For the root directory */
169                 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
170         else if (level != AMDGPU_VM_PTB)
171                 /* Everything in between */
172                 return 512;
173         else
174                 /* For the page tables on the leaves */
175                 return AMDGPU_VM_PTE_COUNT(adev);
176 }
177
178 /**
179  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
180  *
181  * @adev: amdgpu_device pointer
182  *
183  * Calculate the size of the BO for a page directory or page table in bytes.
184  */
185 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
186 {
187         return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
188 }
189
190 /**
191  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
192  *
193  * @vm: vm providing the BOs
194  * @validated: head of validation list
195  * @entry: entry to add
196  *
197  * Add the page directory to the list of BOs to
198  * validate for command submission.
199  */
200 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
201                          struct list_head *validated,
202                          struct amdgpu_bo_list_entry *entry)
203 {
204         entry->robj = vm->root.base.bo;
205         entry->priority = 0;
206         entry->tv.bo = &entry->robj->tbo;
207         entry->tv.shared = true;
208         entry->user_pages = NULL;
209         list_add(&entry->tv.head, validated);
210 }
211
212 /**
213  * amdgpu_vm_validate_pt_bos - validate the page table BOs
214  *
215  * @adev: amdgpu device pointer
216  * @vm: vm providing the BOs
217  * @validate: callback to do the validation
218  * @param: parameter for the validation callback
219  *
220  * Validate the page table BOs on command submission if neccessary.
221  */
222 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
223                               int (*validate)(void *p, struct amdgpu_bo *bo),
224                               void *param)
225 {
226         struct ttm_bo_global *glob = adev->mman.bdev.glob;
227         int r;
228
229         while (!list_empty(&vm->evicted)) {
230                 struct amdgpu_vm_bo_base *bo_base;
231                 struct amdgpu_bo *bo;
232
233                 bo_base = list_first_entry(&vm->evicted,
234                                            struct amdgpu_vm_bo_base,
235                                            vm_status);
236
237                 bo = bo_base->bo;
238                 if (bo->parent) {
239                         r = validate(param, bo);
240                         if (r)
241                                 return r;
242
243                         spin_lock(&glob->lru_lock);
244                         ttm_bo_move_to_lru_tail(&bo->tbo);
245                         if (bo->shadow)
246                                 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
247                         spin_unlock(&glob->lru_lock);
248                 }
249
250                 if (bo->tbo.type == ttm_bo_type_kernel &&
251                     vm->use_cpu_for_update) {
252                         r = amdgpu_bo_kmap(bo, NULL);
253                         if (r)
254                                 return r;
255                 }
256
257                 if (bo->tbo.type != ttm_bo_type_kernel) {
258                         spin_lock(&vm->moved_lock);
259                         list_move(&bo_base->vm_status, &vm->moved);
260                         spin_unlock(&vm->moved_lock);
261                 } else {
262                         list_move(&bo_base->vm_status, &vm->relocated);
263                 }
264         }
265
266         return 0;
267 }
268
269 /**
270  * amdgpu_vm_ready - check VM is ready for updates
271  *
272  * @vm: VM to check
273  *
274  * Check if all VM PDs/PTs are ready for updates
275  */
276 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
277 {
278         return list_empty(&vm->evicted);
279 }
280
281 /**
282  * amdgpu_vm_clear_bo - initially clear the PDs/PTs
283  *
284  * @adev: amdgpu_device pointer
285  * @bo: BO to clear
286  * @level: level this BO is at
287  *
288  * Root PD needs to be reserved when calling this.
289  */
290 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
291                               struct amdgpu_vm *vm, struct amdgpu_bo *bo,
292                               unsigned level, bool pte_support_ats)
293 {
294         struct ttm_operation_ctx ctx = { true, false };
295         struct dma_fence *fence = NULL;
296         unsigned entries, ats_entries;
297         struct amdgpu_ring *ring;
298         struct amdgpu_job *job;
299         uint64_t addr;
300         int r;
301
302         addr = amdgpu_bo_gpu_offset(bo);
303         entries = amdgpu_bo_size(bo) / 8;
304
305         if (pte_support_ats) {
306                 if (level == adev->vm_manager.root_level) {
307                         ats_entries = amdgpu_vm_level_shift(adev, level);
308                         ats_entries += AMDGPU_GPU_PAGE_SHIFT;
309                         ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
310                         ats_entries = min(ats_entries, entries);
311                         entries -= ats_entries;
312                 } else {
313                         ats_entries = entries;
314                         entries = 0;
315                 }
316         } else {
317                 ats_entries = 0;
318         }
319
320         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
321
322         r = reservation_object_reserve_shared(bo->tbo.resv);
323         if (r)
324                 return r;
325
326         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
327         if (r)
328                 goto error;
329
330         r = amdgpu_job_alloc_with_ib(adev, 64, &job);
331         if (r)
332                 goto error;
333
334         if (ats_entries) {
335                 uint64_t ats_value;
336
337                 ats_value = AMDGPU_PTE_DEFAULT_ATC;
338                 if (level != AMDGPU_VM_PTB)
339                         ats_value |= AMDGPU_PDE_PTE;
340
341                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
342                                       ats_entries, 0, ats_value);
343                 addr += ats_entries * 8;
344         }
345
346         if (entries)
347                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
348                                       entries, 0, 0);
349
350         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
351
352         WARN_ON(job->ibs[0].length_dw > 64);
353         r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
354                              AMDGPU_FENCE_OWNER_UNDEFINED, false);
355         if (r)
356                 goto error_free;
357
358         r = amdgpu_job_submit(job, ring, &vm->entity,
359                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
360         if (r)
361                 goto error_free;
362
363         amdgpu_bo_fence(bo, fence, true);
364         dma_fence_put(fence);
365
366         if (bo->shadow)
367                 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
368                                           level, pte_support_ats);
369
370         return 0;
371
372 error_free:
373         amdgpu_job_free(job);
374
375 error:
376         return r;
377 }
378
379 /**
380  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
381  *
382  * @adev: amdgpu_device pointer
383  * @vm: requested vm
384  * @saddr: start of the address range
385  * @eaddr: end of the address range
386  *
387  * Make sure the page directories and page tables are allocated
388  */
389 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
390                                   struct amdgpu_vm *vm,
391                                   struct amdgpu_vm_pt *parent,
392                                   uint64_t saddr, uint64_t eaddr,
393                                   unsigned level, bool ats)
394 {
395         unsigned shift = amdgpu_vm_level_shift(adev, level);
396         unsigned pt_idx, from, to;
397         u64 flags;
398         int r;
399
400         if (!parent->entries) {
401                 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
402
403                 parent->entries = kvmalloc_array(num_entries,
404                                                    sizeof(struct amdgpu_vm_pt),
405                                                    GFP_KERNEL | __GFP_ZERO);
406                 if (!parent->entries)
407                         return -ENOMEM;
408                 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
409         }
410
411         from = saddr >> shift;
412         to = eaddr >> shift;
413         if (from >= amdgpu_vm_num_entries(adev, level) ||
414             to >= amdgpu_vm_num_entries(adev, level))
415                 return -EINVAL;
416
417         ++level;
418         saddr = saddr & ((1 << shift) - 1);
419         eaddr = eaddr & ((1 << shift) - 1);
420
421         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
422         if (vm->use_cpu_for_update)
423                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
424         else
425                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
426                                 AMDGPU_GEM_CREATE_SHADOW);
427
428         /* walk over the address space and allocate the page tables */
429         for (pt_idx = from; pt_idx <= to; ++pt_idx) {
430                 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
431                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
432                 struct amdgpu_bo *pt;
433
434                 if (!entry->base.bo) {
435                         struct amdgpu_bo_param bp;
436
437                         memset(&bp, 0, sizeof(bp));
438                         bp.size = amdgpu_vm_bo_size(adev, level);
439                         bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
440                         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
441                         bp.flags = flags;
442                         bp.type = ttm_bo_type_kernel;
443                         bp.resv = resv;
444                         r = amdgpu_bo_create(adev, &bp, &pt);
445                         if (r)
446                                 return r;
447
448                         r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
449                         if (r) {
450                                 amdgpu_bo_unref(&pt->shadow);
451                                 amdgpu_bo_unref(&pt);
452                                 return r;
453                         }
454
455                         if (vm->use_cpu_for_update) {
456                                 r = amdgpu_bo_kmap(pt, NULL);
457                                 if (r) {
458                                         amdgpu_bo_unref(&pt->shadow);
459                                         amdgpu_bo_unref(&pt);
460                                         return r;
461                                 }
462                         }
463
464                         /* Keep a reference to the root directory to avoid
465                         * freeing them up in the wrong order.
466                         */
467                         pt->parent = amdgpu_bo_ref(parent->base.bo);
468
469                         amdgpu_vm_bo_base_init(&entry->base, vm, pt);
470                         list_move(&entry->base.vm_status, &vm->relocated);
471                 }
472
473                 if (level < AMDGPU_VM_PTB) {
474                         uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
475                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
476                                 ((1 << shift) - 1);
477                         r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
478                                                    sub_eaddr, level, ats);
479                         if (r)
480                                 return r;
481                 }
482         }
483
484         return 0;
485 }
486
487 /**
488  * amdgpu_vm_alloc_pts - Allocate page tables.
489  *
490  * @adev: amdgpu_device pointer
491  * @vm: VM to allocate page tables for
492  * @saddr: Start address which needs to be allocated
493  * @size: Size from start address we need.
494  *
495  * Make sure the page tables are allocated.
496  */
497 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
498                         struct amdgpu_vm *vm,
499                         uint64_t saddr, uint64_t size)
500 {
501         uint64_t eaddr;
502         bool ats = false;
503
504         /* validate the parameters */
505         if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
506                 return -EINVAL;
507
508         eaddr = saddr + size - 1;
509
510         if (vm->pte_support_ats)
511                 ats = saddr < AMDGPU_VA_HOLE_START;
512
513         saddr /= AMDGPU_GPU_PAGE_SIZE;
514         eaddr /= AMDGPU_GPU_PAGE_SIZE;
515
516         if (eaddr >= adev->vm_manager.max_pfn) {
517                 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
518                         eaddr, adev->vm_manager.max_pfn);
519                 return -EINVAL;
520         }
521
522         return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
523                                       adev->vm_manager.root_level, ats);
524 }
525
526 /**
527  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
528  *
529  * @adev: amdgpu_device pointer
530  */
531 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
532 {
533         const struct amdgpu_ip_block *ip_block;
534         bool has_compute_vm_bug;
535         struct amdgpu_ring *ring;
536         int i;
537
538         has_compute_vm_bug = false;
539
540         ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
541         if (ip_block) {
542                 /* Compute has a VM bug for GFX version < 7.
543                    Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
544                 if (ip_block->version->major <= 7)
545                         has_compute_vm_bug = true;
546                 else if (ip_block->version->major == 8)
547                         if (adev->gfx.mec_fw_version < 673)
548                                 has_compute_vm_bug = true;
549         }
550
551         for (i = 0; i < adev->num_rings; i++) {
552                 ring = adev->rings[i];
553                 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
554                         /* only compute rings */
555                         ring->has_compute_vm_bug = has_compute_vm_bug;
556                 else
557                         ring->has_compute_vm_bug = false;
558         }
559 }
560
561 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
562                                   struct amdgpu_job *job)
563 {
564         struct amdgpu_device *adev = ring->adev;
565         unsigned vmhub = ring->funcs->vmhub;
566         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
567         struct amdgpu_vmid *id;
568         bool gds_switch_needed;
569         bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
570
571         if (job->vmid == 0)
572                 return false;
573         id = &id_mgr->ids[job->vmid];
574         gds_switch_needed = ring->funcs->emit_gds_switch && (
575                 id->gds_base != job->gds_base ||
576                 id->gds_size != job->gds_size ||
577                 id->gws_base != job->gws_base ||
578                 id->gws_size != job->gws_size ||
579                 id->oa_base != job->oa_base ||
580                 id->oa_size != job->oa_size);
581
582         if (amdgpu_vmid_had_gpu_reset(adev, id))
583                 return true;
584
585         return vm_flush_needed || gds_switch_needed;
586 }
587
588 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
589 {
590         return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
591 }
592
593 /**
594  * amdgpu_vm_flush - hardware flush the vm
595  *
596  * @ring: ring to use for flush
597  * @vmid: vmid number to use
598  * @pd_addr: address of the page directory
599  *
600  * Emit a VM flush when it is necessary.
601  */
602 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
603 {
604         struct amdgpu_device *adev = ring->adev;
605         unsigned vmhub = ring->funcs->vmhub;
606         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
607         struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
608         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
609                 id->gds_base != job->gds_base ||
610                 id->gds_size != job->gds_size ||
611                 id->gws_base != job->gws_base ||
612                 id->gws_size != job->gws_size ||
613                 id->oa_base != job->oa_base ||
614                 id->oa_size != job->oa_size);
615         bool vm_flush_needed = job->vm_needs_flush;
616         bool pasid_mapping_needed = id->pasid != job->pasid ||
617                 !id->pasid_mapping ||
618                 !dma_fence_is_signaled(id->pasid_mapping);
619         struct dma_fence *fence = NULL;
620         unsigned patch_offset = 0;
621         int r;
622
623         if (amdgpu_vmid_had_gpu_reset(adev, id)) {
624                 gds_switch_needed = true;
625                 vm_flush_needed = true;
626                 pasid_mapping_needed = true;
627         }
628
629         gds_switch_needed &= !!ring->funcs->emit_gds_switch;
630         vm_flush_needed &= !!ring->funcs->emit_vm_flush;
631         pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
632                 ring->funcs->emit_wreg;
633
634         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
635                 return 0;
636
637         if (ring->funcs->init_cond_exec)
638                 patch_offset = amdgpu_ring_init_cond_exec(ring);
639
640         if (need_pipe_sync)
641                 amdgpu_ring_emit_pipeline_sync(ring);
642
643         if (vm_flush_needed) {
644                 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
645                 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
646         }
647
648         if (pasid_mapping_needed)
649                 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
650
651         if (vm_flush_needed || pasid_mapping_needed) {
652                 r = amdgpu_fence_emit(ring, &fence, 0);
653                 if (r)
654                         return r;
655         }
656
657         if (vm_flush_needed) {
658                 mutex_lock(&id_mgr->lock);
659                 dma_fence_put(id->last_flush);
660                 id->last_flush = dma_fence_get(fence);
661                 id->current_gpu_reset_count =
662                         atomic_read(&adev->gpu_reset_counter);
663                 mutex_unlock(&id_mgr->lock);
664         }
665
666         if (pasid_mapping_needed) {
667                 id->pasid = job->pasid;
668                 dma_fence_put(id->pasid_mapping);
669                 id->pasid_mapping = dma_fence_get(fence);
670         }
671         dma_fence_put(fence);
672
673         if (ring->funcs->emit_gds_switch && gds_switch_needed) {
674                 id->gds_base = job->gds_base;
675                 id->gds_size = job->gds_size;
676                 id->gws_base = job->gws_base;
677                 id->gws_size = job->gws_size;
678                 id->oa_base = job->oa_base;
679                 id->oa_size = job->oa_size;
680                 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
681                                             job->gds_size, job->gws_base,
682                                             job->gws_size, job->oa_base,
683                                             job->oa_size);
684         }
685
686         if (ring->funcs->patch_cond_exec)
687                 amdgpu_ring_patch_cond_exec(ring, patch_offset);
688
689         /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
690         if (ring->funcs->emit_switch_buffer) {
691                 amdgpu_ring_emit_switch_buffer(ring);
692                 amdgpu_ring_emit_switch_buffer(ring);
693         }
694         return 0;
695 }
696
697 /**
698  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
699  *
700  * @vm: requested vm
701  * @bo: requested buffer object
702  *
703  * Find @bo inside the requested vm.
704  * Search inside the @bos vm list for the requested vm
705  * Returns the found bo_va or NULL if none is found
706  *
707  * Object has to be reserved!
708  */
709 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
710                                        struct amdgpu_bo *bo)
711 {
712         struct amdgpu_bo_va *bo_va;
713
714         list_for_each_entry(bo_va, &bo->va, base.bo_list) {
715                 if (bo_va->base.vm == vm) {
716                         return bo_va;
717                 }
718         }
719         return NULL;
720 }
721
722 /**
723  * amdgpu_vm_do_set_ptes - helper to call the right asic function
724  *
725  * @params: see amdgpu_pte_update_params definition
726  * @bo: PD/PT to update
727  * @pe: addr of the page entry
728  * @addr: dst addr to write into pe
729  * @count: number of page entries to update
730  * @incr: increase next addr by incr bytes
731  * @flags: hw access flags
732  *
733  * Traces the parameters and calls the right asic functions
734  * to setup the page table using the DMA.
735  */
736 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
737                                   struct amdgpu_bo *bo,
738                                   uint64_t pe, uint64_t addr,
739                                   unsigned count, uint32_t incr,
740                                   uint64_t flags)
741 {
742         pe += amdgpu_bo_gpu_offset(bo);
743         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
744
745         if (count < 3) {
746                 amdgpu_vm_write_pte(params->adev, params->ib, pe,
747                                     addr | flags, count, incr);
748
749         } else {
750                 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
751                                       count, incr, flags);
752         }
753 }
754
755 /**
756  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
757  *
758  * @params: see amdgpu_pte_update_params definition
759  * @bo: PD/PT to update
760  * @pe: addr of the page entry
761  * @addr: dst addr to write into pe
762  * @count: number of page entries to update
763  * @incr: increase next addr by incr bytes
764  * @flags: hw access flags
765  *
766  * Traces the parameters and calls the DMA function to copy the PTEs.
767  */
768 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
769                                    struct amdgpu_bo *bo,
770                                    uint64_t pe, uint64_t addr,
771                                    unsigned count, uint32_t incr,
772                                    uint64_t flags)
773 {
774         uint64_t src = (params->src + (addr >> 12) * 8);
775
776         pe += amdgpu_bo_gpu_offset(bo);
777         trace_amdgpu_vm_copy_ptes(pe, src, count);
778
779         amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
780 }
781
782 /**
783  * amdgpu_vm_map_gart - Resolve gart mapping of addr
784  *
785  * @pages_addr: optional DMA address to use for lookup
786  * @addr: the unmapped addr
787  *
788  * Look up the physical address of the page that the pte resolves
789  * to and return the pointer for the page table entry.
790  */
791 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
792 {
793         uint64_t result;
794
795         /* page table offset */
796         result = pages_addr[addr >> PAGE_SHIFT];
797
798         /* in case cpu page size != gpu page size*/
799         result |= addr & (~PAGE_MASK);
800
801         result &= 0xFFFFFFFFFFFFF000ULL;
802
803         return result;
804 }
805
806 /**
807  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
808  *
809  * @params: see amdgpu_pte_update_params definition
810  * @bo: PD/PT to update
811  * @pe: kmap addr of the page entry
812  * @addr: dst addr to write into pe
813  * @count: number of page entries to update
814  * @incr: increase next addr by incr bytes
815  * @flags: hw access flags
816  *
817  * Write count number of PT/PD entries directly.
818  */
819 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
820                                    struct amdgpu_bo *bo,
821                                    uint64_t pe, uint64_t addr,
822                                    unsigned count, uint32_t incr,
823                                    uint64_t flags)
824 {
825         unsigned int i;
826         uint64_t value;
827
828         pe += (unsigned long)amdgpu_bo_kptr(bo);
829
830         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
831
832         for (i = 0; i < count; i++) {
833                 value = params->pages_addr ?
834                         amdgpu_vm_map_gart(params->pages_addr, addr) :
835                         addr;
836                 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
837                                        i, value, flags);
838                 addr += incr;
839         }
840 }
841
842 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
843                              void *owner)
844 {
845         struct amdgpu_sync sync;
846         int r;
847
848         amdgpu_sync_create(&sync);
849         amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
850         r = amdgpu_sync_wait(&sync, true);
851         amdgpu_sync_free(&sync);
852
853         return r;
854 }
855
856 /*
857  * amdgpu_vm_update_pde - update a single level in the hierarchy
858  *
859  * @param: parameters for the update
860  * @vm: requested vm
861  * @parent: parent directory
862  * @entry: entry to update
863  *
864  * Makes sure the requested entry in parent is up to date.
865  */
866 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
867                                  struct amdgpu_vm *vm,
868                                  struct amdgpu_vm_pt *parent,
869                                  struct amdgpu_vm_pt *entry)
870 {
871         struct amdgpu_bo *bo = parent->base.bo, *pbo;
872         uint64_t pde, pt, flags;
873         unsigned level;
874
875         /* Don't update huge pages here */
876         if (entry->huge)
877                 return;
878
879         for (level = 0, pbo = bo->parent; pbo; ++level)
880                 pbo = pbo->parent;
881
882         level += params->adev->vm_manager.root_level;
883         pt = amdgpu_bo_gpu_offset(entry->base.bo);
884         flags = AMDGPU_PTE_VALID;
885         amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
886         pde = (entry - parent->entries) * 8;
887         if (bo->shadow)
888                 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
889         params->func(params, bo, pde, pt, 1, 0, flags);
890 }
891
892 /*
893  * amdgpu_vm_invalidate_level - mark all PD levels as invalid
894  *
895  * @parent: parent PD
896  *
897  * Mark all PD level as invalid after an error.
898  */
899 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
900                                        struct amdgpu_vm *vm,
901                                        struct amdgpu_vm_pt *parent,
902                                        unsigned level)
903 {
904         unsigned pt_idx, num_entries;
905
906         /*
907          * Recurse into the subdirectories. This recursion is harmless because
908          * we only have a maximum of 5 layers.
909          */
910         num_entries = amdgpu_vm_num_entries(adev, level);
911         for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
912                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
913
914                 if (!entry->base.bo)
915                         continue;
916
917                 if (list_empty(&entry->base.vm_status))
918                         list_add(&entry->base.vm_status, &vm->relocated);
919                 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
920         }
921 }
922
923 /*
924  * amdgpu_vm_update_directories - make sure that all directories are valid
925  *
926  * @adev: amdgpu_device pointer
927  * @vm: requested vm
928  *
929  * Makes sure all directories are up to date.
930  * Returns 0 for success, error for failure.
931  */
932 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
933                                  struct amdgpu_vm *vm)
934 {
935         struct amdgpu_pte_update_params params;
936         struct amdgpu_job *job;
937         unsigned ndw = 0;
938         int r = 0;
939
940         if (list_empty(&vm->relocated))
941                 return 0;
942
943 restart:
944         memset(&params, 0, sizeof(params));
945         params.adev = adev;
946
947         if (vm->use_cpu_for_update) {
948                 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
949                 if (unlikely(r))
950                         return r;
951
952                 params.func = amdgpu_vm_cpu_set_ptes;
953         } else {
954                 ndw = 512 * 8;
955                 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
956                 if (r)
957                         return r;
958
959                 params.ib = &job->ibs[0];
960                 params.func = amdgpu_vm_do_set_ptes;
961         }
962
963         while (!list_empty(&vm->relocated)) {
964                 struct amdgpu_vm_bo_base *bo_base, *parent;
965                 struct amdgpu_vm_pt *pt, *entry;
966                 struct amdgpu_bo *bo;
967
968                 bo_base = list_first_entry(&vm->relocated,
969                                            struct amdgpu_vm_bo_base,
970                                            vm_status);
971                 list_del_init(&bo_base->vm_status);
972
973                 bo = bo_base->bo->parent;
974                 if (!bo)
975                         continue;
976
977                 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
978                                           bo_list);
979                 pt = container_of(parent, struct amdgpu_vm_pt, base);
980                 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
981
982                 amdgpu_vm_update_pde(&params, vm, pt, entry);
983
984                 if (!vm->use_cpu_for_update &&
985                     (ndw - params.ib->length_dw) < 32)
986                         break;
987         }
988
989         if (vm->use_cpu_for_update) {
990                 /* Flush HDP */
991                 mb();
992                 amdgpu_asic_flush_hdp(adev, NULL);
993         } else if (params.ib->length_dw == 0) {
994                 amdgpu_job_free(job);
995         } else {
996                 struct amdgpu_bo *root = vm->root.base.bo;
997                 struct amdgpu_ring *ring;
998                 struct dma_fence *fence;
999
1000                 ring = container_of(vm->entity.sched, struct amdgpu_ring,
1001                                     sched);
1002
1003                 amdgpu_ring_pad_ib(ring, params.ib);
1004                 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1005                                  AMDGPU_FENCE_OWNER_VM, false);
1006                 WARN_ON(params.ib->length_dw > ndw);
1007                 r = amdgpu_job_submit(job, ring, &vm->entity,
1008                                       AMDGPU_FENCE_OWNER_VM, &fence);
1009                 if (r)
1010                         goto error;
1011
1012                 amdgpu_bo_fence(root, fence, true);
1013                 dma_fence_put(vm->last_update);
1014                 vm->last_update = fence;
1015         }
1016
1017         if (!list_empty(&vm->relocated))
1018                 goto restart;
1019
1020         return 0;
1021
1022 error:
1023         amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1024                                    adev->vm_manager.root_level);
1025         amdgpu_job_free(job);
1026         return r;
1027 }
1028
1029 /**
1030  * amdgpu_vm_find_entry - find the entry for an address
1031  *
1032  * @p: see amdgpu_pte_update_params definition
1033  * @addr: virtual address in question
1034  * @entry: resulting entry or NULL
1035  * @parent: parent entry
1036  *
1037  * Find the vm_pt entry and it's parent for the given address.
1038  */
1039 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1040                          struct amdgpu_vm_pt **entry,
1041                          struct amdgpu_vm_pt **parent)
1042 {
1043         unsigned level = p->adev->vm_manager.root_level;
1044
1045         *parent = NULL;
1046         *entry = &p->vm->root;
1047         while ((*entry)->entries) {
1048                 unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1049
1050                 *parent = *entry;
1051                 *entry = &(*entry)->entries[addr >> shift];
1052                 addr &= (1ULL << shift) - 1;
1053         }
1054
1055         if (level != AMDGPU_VM_PTB)
1056                 *entry = NULL;
1057 }
1058
1059 /**
1060  * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1061  *
1062  * @p: see amdgpu_pte_update_params definition
1063  * @entry: vm_pt entry to check
1064  * @parent: parent entry
1065  * @nptes: number of PTEs updated with this operation
1066  * @dst: destination address where the PTEs should point to
1067  * @flags: access flags fro the PTEs
1068  *
1069  * Check if we can update the PD with a huge page.
1070  */
1071 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1072                                         struct amdgpu_vm_pt *entry,
1073                                         struct amdgpu_vm_pt *parent,
1074                                         unsigned nptes, uint64_t dst,
1075                                         uint64_t flags)
1076 {
1077         uint64_t pde;
1078
1079         /* In the case of a mixed PT the PDE must point to it*/
1080         if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1081             nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1082                 /* Set the huge page flag to stop scanning at this PDE */
1083                 flags |= AMDGPU_PDE_PTE;
1084         }
1085
1086         if (!(flags & AMDGPU_PDE_PTE)) {
1087                 if (entry->huge) {
1088                         /* Add the entry to the relocated list to update it. */
1089                         entry->huge = false;
1090                         list_move(&entry->base.vm_status, &p->vm->relocated);
1091                 }
1092                 return;
1093         }
1094
1095         entry->huge = true;
1096         amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1097
1098         pde = (entry - parent->entries) * 8;
1099         if (parent->base.bo->shadow)
1100                 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1101         p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1102 }
1103
1104 /**
1105  * amdgpu_vm_update_ptes - make sure that page tables are valid
1106  *
1107  * @params: see amdgpu_pte_update_params definition
1108  * @vm: requested vm
1109  * @start: start of GPU address range
1110  * @end: end of GPU address range
1111  * @dst: destination address to map to, the next dst inside the function
1112  * @flags: mapping flags
1113  *
1114  * Update the page tables in the range @start - @end.
1115  * Returns 0 for success, -EINVAL for failure.
1116  */
1117 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1118                                   uint64_t start, uint64_t end,
1119                                   uint64_t dst, uint64_t flags)
1120 {
1121         struct amdgpu_device *adev = params->adev;
1122         const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1123
1124         uint64_t addr, pe_start;
1125         struct amdgpu_bo *pt;
1126         unsigned nptes;
1127
1128         /* walk over the address space and update the page tables */
1129         for (addr = start; addr < end; addr += nptes,
1130              dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1131                 struct amdgpu_vm_pt *entry, *parent;
1132
1133                 amdgpu_vm_get_entry(params, addr, &entry, &parent);
1134                 if (!entry)
1135                         return -ENOENT;
1136
1137                 if ((addr & ~mask) == (end & ~mask))
1138                         nptes = end - addr;
1139                 else
1140                         nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1141
1142                 amdgpu_vm_handle_huge_pages(params, entry, parent,
1143                                             nptes, dst, flags);
1144                 /* We don't need to update PTEs for huge pages */
1145                 if (entry->huge)
1146                         continue;
1147
1148                 pt = entry->base.bo;
1149                 pe_start = (addr & mask) * 8;
1150                 if (pt->shadow)
1151                         params->func(params, pt->shadow, pe_start, dst, nptes,
1152                                      AMDGPU_GPU_PAGE_SIZE, flags);
1153                 params->func(params, pt, pe_start, dst, nptes,
1154                              AMDGPU_GPU_PAGE_SIZE, flags);
1155         }
1156
1157         return 0;
1158 }
1159
1160 /*
1161  * amdgpu_vm_frag_ptes - add fragment information to PTEs
1162  *
1163  * @params: see amdgpu_pte_update_params definition
1164  * @vm: requested vm
1165  * @start: first PTE to handle
1166  * @end: last PTE to handle
1167  * @dst: addr those PTEs should point to
1168  * @flags: hw mapping flags
1169  * Returns 0 for success, -EINVAL for failure.
1170  */
1171 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params  *params,
1172                                 uint64_t start, uint64_t end,
1173                                 uint64_t dst, uint64_t flags)
1174 {
1175         /**
1176          * The MC L1 TLB supports variable sized pages, based on a fragment
1177          * field in the PTE. When this field is set to a non-zero value, page
1178          * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1179          * flags are considered valid for all PTEs within the fragment range
1180          * and corresponding mappings are assumed to be physically contiguous.
1181          *
1182          * The L1 TLB can store a single PTE for the whole fragment,
1183          * significantly increasing the space available for translation
1184          * caching. This leads to large improvements in throughput when the
1185          * TLB is under pressure.
1186          *
1187          * The L2 TLB distributes small and large fragments into two
1188          * asymmetric partitions. The large fragment cache is significantly
1189          * larger. Thus, we try to use large fragments wherever possible.
1190          * Userspace can support this by aligning virtual base address and
1191          * allocation size to the fragment size.
1192          */
1193         unsigned max_frag = params->adev->vm_manager.fragment_size;
1194         int r;
1195
1196         /* system pages are non continuously */
1197         if (params->src || !(flags & AMDGPU_PTE_VALID))
1198                 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1199
1200         while (start != end) {
1201                 uint64_t frag_flags, frag_end;
1202                 unsigned frag;
1203
1204                 /* This intentionally wraps around if no bit is set */
1205                 frag = min((unsigned)ffs(start) - 1,
1206                            (unsigned)fls64(end - start) - 1);
1207                 if (frag >= max_frag) {
1208                         frag_flags = AMDGPU_PTE_FRAG(max_frag);
1209                         frag_end = end & ~((1ULL << max_frag) - 1);
1210                 } else {
1211                         frag_flags = AMDGPU_PTE_FRAG(frag);
1212                         frag_end = start + (1 << frag);
1213                 }
1214
1215                 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1216                                           flags | frag_flags);
1217                 if (r)
1218                         return r;
1219
1220                 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1221                 start = frag_end;
1222         }
1223
1224         return 0;
1225 }
1226
1227 /**
1228  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1229  *
1230  * @adev: amdgpu_device pointer
1231  * @exclusive: fence we need to sync to
1232  * @pages_addr: DMA addresses to use for mapping
1233  * @vm: requested vm
1234  * @start: start of mapped range
1235  * @last: last mapped entry
1236  * @flags: flags for the entries
1237  * @addr: addr to set the area to
1238  * @fence: optional resulting fence
1239  *
1240  * Fill in the page table entries between @start and @last.
1241  * Returns 0 for success, -EINVAL for failure.
1242  */
1243 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1244                                        struct dma_fence *exclusive,
1245                                        dma_addr_t *pages_addr,
1246                                        struct amdgpu_vm *vm,
1247                                        uint64_t start, uint64_t last,
1248                                        uint64_t flags, uint64_t addr,
1249                                        struct dma_fence **fence)
1250 {
1251         struct amdgpu_ring *ring;
1252         void *owner = AMDGPU_FENCE_OWNER_VM;
1253         unsigned nptes, ncmds, ndw;
1254         struct amdgpu_job *job;
1255         struct amdgpu_pte_update_params params;
1256         struct dma_fence *f = NULL;
1257         int r;
1258
1259         memset(&params, 0, sizeof(params));
1260         params.adev = adev;
1261         params.vm = vm;
1262
1263         /* sync to everything on unmapping */
1264         if (!(flags & AMDGPU_PTE_VALID))
1265                 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1266
1267         if (vm->use_cpu_for_update) {
1268                 /* params.src is used as flag to indicate system Memory */
1269                 if (pages_addr)
1270                         params.src = ~0;
1271
1272                 /* Wait for PT BOs to be free. PTs share the same resv. object
1273                  * as the root PD BO
1274                  */
1275                 r = amdgpu_vm_wait_pd(adev, vm, owner);
1276                 if (unlikely(r))
1277                         return r;
1278
1279                 params.func = amdgpu_vm_cpu_set_ptes;
1280                 params.pages_addr = pages_addr;
1281                 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1282                                            addr, flags);
1283         }
1284
1285         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1286
1287         nptes = last - start + 1;
1288
1289         /*
1290          * reserve space for two commands every (1 << BLOCK_SIZE)
1291          *  entries or 2k dwords (whatever is smaller)
1292          *
1293          * The second command is for the shadow pagetables.
1294          */
1295         if (vm->root.base.bo->shadow)
1296                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1297         else
1298                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1299
1300         /* padding, etc. */
1301         ndw = 64;
1302
1303         if (pages_addr) {
1304                 /* copy commands needed */
1305                 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1306
1307                 /* and also PTEs */
1308                 ndw += nptes * 2;
1309
1310                 params.func = amdgpu_vm_do_copy_ptes;
1311
1312         } else {
1313                 /* set page commands needed */
1314                 ndw += ncmds * 10;
1315
1316                 /* extra commands for begin/end fragments */
1317                 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1318
1319                 params.func = amdgpu_vm_do_set_ptes;
1320         }
1321
1322         r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1323         if (r)
1324                 return r;
1325
1326         params.ib = &job->ibs[0];
1327
1328         if (pages_addr) {
1329                 uint64_t *pte;
1330                 unsigned i;
1331
1332                 /* Put the PTEs at the end of the IB. */
1333                 i = ndw - nptes * 2;
1334                 pte= (uint64_t *)&(job->ibs->ptr[i]);
1335                 params.src = job->ibs->gpu_addr + i * 4;
1336
1337                 for (i = 0; i < nptes; ++i) {
1338                         pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1339                                                     AMDGPU_GPU_PAGE_SIZE);
1340                         pte[i] |= flags;
1341                 }
1342                 addr = 0;
1343         }
1344
1345         r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1346         if (r)
1347                 goto error_free;
1348
1349         r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1350                              owner, false);
1351         if (r)
1352                 goto error_free;
1353
1354         r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1355         if (r)
1356                 goto error_free;
1357
1358         r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1359         if (r)
1360                 goto error_free;
1361
1362         amdgpu_ring_pad_ib(ring, params.ib);
1363         WARN_ON(params.ib->length_dw > ndw);
1364         r = amdgpu_job_submit(job, ring, &vm->entity,
1365                               AMDGPU_FENCE_OWNER_VM, &f);
1366         if (r)
1367                 goto error_free;
1368
1369         amdgpu_bo_fence(vm->root.base.bo, f, true);
1370         dma_fence_put(*fence);
1371         *fence = f;
1372         return 0;
1373
1374 error_free:
1375         amdgpu_job_free(job);
1376         return r;
1377 }
1378
1379 /**
1380  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1381  *
1382  * @adev: amdgpu_device pointer
1383  * @exclusive: fence we need to sync to
1384  * @pages_addr: DMA addresses to use for mapping
1385  * @vm: requested vm
1386  * @mapping: mapped range and flags to use for the update
1387  * @flags: HW flags for the mapping
1388  * @nodes: array of drm_mm_nodes with the MC addresses
1389  * @fence: optional resulting fence
1390  *
1391  * Split the mapping into smaller chunks so that each update fits
1392  * into a SDMA IB.
1393  * Returns 0 for success, -EINVAL for failure.
1394  */
1395 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1396                                       struct dma_fence *exclusive,
1397                                       dma_addr_t *pages_addr,
1398                                       struct amdgpu_vm *vm,
1399                                       struct amdgpu_bo_va_mapping *mapping,
1400                                       uint64_t flags,
1401                                       struct drm_mm_node *nodes,
1402                                       struct dma_fence **fence)
1403 {
1404         unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
1405         uint64_t pfn, start = mapping->start;
1406         int r;
1407
1408         /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1409          * but in case of something, we filter the flags in first place
1410          */
1411         if (!(mapping->flags & AMDGPU_PTE_READABLE))
1412                 flags &= ~AMDGPU_PTE_READABLE;
1413         if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1414                 flags &= ~AMDGPU_PTE_WRITEABLE;
1415
1416         flags &= ~AMDGPU_PTE_EXECUTABLE;
1417         flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1418
1419         flags &= ~AMDGPU_PTE_MTYPE_MASK;
1420         flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1421
1422         if ((mapping->flags & AMDGPU_PTE_PRT) &&
1423             (adev->asic_type >= CHIP_VEGA10)) {
1424                 flags |= AMDGPU_PTE_PRT;
1425                 flags &= ~AMDGPU_PTE_VALID;
1426         }
1427
1428         trace_amdgpu_vm_bo_update(mapping);
1429
1430         pfn = mapping->offset >> PAGE_SHIFT;
1431         if (nodes) {
1432                 while (pfn >= nodes->size) {
1433                         pfn -= nodes->size;
1434                         ++nodes;
1435                 }
1436         }
1437
1438         do {
1439                 dma_addr_t *dma_addr = NULL;
1440                 uint64_t max_entries;
1441                 uint64_t addr, last;
1442
1443                 if (nodes) {
1444                         addr = nodes->start << PAGE_SHIFT;
1445                         max_entries = (nodes->size - pfn) *
1446                                 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1447                 } else {
1448                         addr = 0;
1449                         max_entries = S64_MAX;
1450                 }
1451
1452                 if (pages_addr) {
1453                         uint64_t count;
1454
1455                         max_entries = min(max_entries, 16ull * 1024ull);
1456                         for (count = 1; count < max_entries; ++count) {
1457                                 uint64_t idx = pfn + count;
1458
1459                                 if (pages_addr[idx] !=
1460                                     (pages_addr[idx - 1] + PAGE_SIZE))
1461                                         break;
1462                         }
1463
1464                         if (count < min_linear_pages) {
1465                                 addr = pfn << PAGE_SHIFT;
1466                                 dma_addr = pages_addr;
1467                         } else {
1468                                 addr = pages_addr[pfn];
1469                                 max_entries = count;
1470                         }
1471
1472                 } else if (flags & AMDGPU_PTE_VALID) {
1473                         addr += adev->vm_manager.vram_base_offset;
1474                         addr += pfn << PAGE_SHIFT;
1475                 }
1476
1477                 last = min((uint64_t)mapping->last, start + max_entries - 1);
1478                 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1479                                                 start, last, flags, addr,
1480                                                 fence);
1481                 if (r)
1482                         return r;
1483
1484                 pfn += last - start + 1;
1485                 if (nodes && nodes->size == pfn) {
1486                         pfn = 0;
1487                         ++nodes;
1488                 }
1489                 start = last + 1;
1490
1491         } while (unlikely(start != mapping->last + 1));
1492
1493         return 0;
1494 }
1495
1496 /**
1497  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1498  *
1499  * @adev: amdgpu_device pointer
1500  * @bo_va: requested BO and VM object
1501  * @clear: if true clear the entries
1502  *
1503  * Fill in the page table entries for @bo_va.
1504  * Returns 0 for success, -EINVAL for failure.
1505  */
1506 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1507                         struct amdgpu_bo_va *bo_va,
1508                         bool clear)
1509 {
1510         struct amdgpu_bo *bo = bo_va->base.bo;
1511         struct amdgpu_vm *vm = bo_va->base.vm;
1512         struct amdgpu_bo_va_mapping *mapping;
1513         dma_addr_t *pages_addr = NULL;
1514         struct ttm_mem_reg *mem;
1515         struct drm_mm_node *nodes;
1516         struct dma_fence *exclusive, **last_update;
1517         uint64_t flags;
1518         int r;
1519
1520         if (clear || !bo_va->base.bo) {
1521                 mem = NULL;
1522                 nodes = NULL;
1523                 exclusive = NULL;
1524         } else {
1525                 struct ttm_dma_tt *ttm;
1526
1527                 mem = &bo_va->base.bo->tbo.mem;
1528                 nodes = mem->mm_node;
1529                 if (mem->mem_type == TTM_PL_TT) {
1530                         ttm = container_of(bo_va->base.bo->tbo.ttm,
1531                                            struct ttm_dma_tt, ttm);
1532                         pages_addr = ttm->dma_address;
1533                 }
1534                 exclusive = reservation_object_get_excl(bo->tbo.resv);
1535         }
1536
1537         if (bo)
1538                 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1539         else
1540                 flags = 0x0;
1541
1542         if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1543                 last_update = &vm->last_update;
1544         else
1545                 last_update = &bo_va->last_pt_update;
1546
1547         if (!clear && bo_va->base.moved) {
1548                 bo_va->base.moved = false;
1549                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1550
1551         } else if (bo_va->cleared != clear) {
1552                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1553         }
1554
1555         list_for_each_entry(mapping, &bo_va->invalids, list) {
1556                 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1557                                                mapping, flags, nodes,
1558                                                last_update);
1559                 if (r)
1560                         return r;
1561         }
1562
1563         if (vm->use_cpu_for_update) {
1564                 /* Flush HDP */
1565                 mb();
1566                 amdgpu_asic_flush_hdp(adev, NULL);
1567         }
1568
1569         spin_lock(&vm->moved_lock);
1570         list_del_init(&bo_va->base.vm_status);
1571         spin_unlock(&vm->moved_lock);
1572
1573         /* If the BO is not in its preferred location add it back to
1574          * the evicted list so that it gets validated again on the
1575          * next command submission.
1576          */
1577         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1578             !(bo->preferred_domains &
1579             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)))
1580                 list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1581
1582         list_splice_init(&bo_va->invalids, &bo_va->valids);
1583         bo_va->cleared = clear;
1584
1585         if (trace_amdgpu_vm_bo_mapping_enabled()) {
1586                 list_for_each_entry(mapping, &bo_va->valids, list)
1587                         trace_amdgpu_vm_bo_mapping(mapping);
1588         }
1589
1590         return 0;
1591 }
1592
1593 /**
1594  * amdgpu_vm_update_prt_state - update the global PRT state
1595  */
1596 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1597 {
1598         unsigned long flags;
1599         bool enable;
1600
1601         spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1602         enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1603         adev->gmc.gmc_funcs->set_prt(adev, enable);
1604         spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1605 }
1606
1607 /**
1608  * amdgpu_vm_prt_get - add a PRT user
1609  */
1610 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1611 {
1612         if (!adev->gmc.gmc_funcs->set_prt)
1613                 return;
1614
1615         if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1616                 amdgpu_vm_update_prt_state(adev);
1617 }
1618
1619 /**
1620  * amdgpu_vm_prt_put - drop a PRT user
1621  */
1622 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1623 {
1624         if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1625                 amdgpu_vm_update_prt_state(adev);
1626 }
1627
1628 /**
1629  * amdgpu_vm_prt_cb - callback for updating the PRT status
1630  */
1631 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1632 {
1633         struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1634
1635         amdgpu_vm_prt_put(cb->adev);
1636         kfree(cb);
1637 }
1638
1639 /**
1640  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1641  */
1642 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1643                                  struct dma_fence *fence)
1644 {
1645         struct amdgpu_prt_cb *cb;
1646
1647         if (!adev->gmc.gmc_funcs->set_prt)
1648                 return;
1649
1650         cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1651         if (!cb) {
1652                 /* Last resort when we are OOM */
1653                 if (fence)
1654                         dma_fence_wait(fence, false);
1655
1656                 amdgpu_vm_prt_put(adev);
1657         } else {
1658                 cb->adev = adev;
1659                 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1660                                                      amdgpu_vm_prt_cb))
1661                         amdgpu_vm_prt_cb(fence, &cb->cb);
1662         }
1663 }
1664
1665 /**
1666  * amdgpu_vm_free_mapping - free a mapping
1667  *
1668  * @adev: amdgpu_device pointer
1669  * @vm: requested vm
1670  * @mapping: mapping to be freed
1671  * @fence: fence of the unmap operation
1672  *
1673  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1674  */
1675 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1676                                    struct amdgpu_vm *vm,
1677                                    struct amdgpu_bo_va_mapping *mapping,
1678                                    struct dma_fence *fence)
1679 {
1680         if (mapping->flags & AMDGPU_PTE_PRT)
1681                 amdgpu_vm_add_prt_cb(adev, fence);
1682         kfree(mapping);
1683 }
1684
1685 /**
1686  * amdgpu_vm_prt_fini - finish all prt mappings
1687  *
1688  * @adev: amdgpu_device pointer
1689  * @vm: requested vm
1690  *
1691  * Register a cleanup callback to disable PRT support after VM dies.
1692  */
1693 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1694 {
1695         struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1696         struct dma_fence *excl, **shared;
1697         unsigned i, shared_count;
1698         int r;
1699
1700         r = reservation_object_get_fences_rcu(resv, &excl,
1701                                               &shared_count, &shared);
1702         if (r) {
1703                 /* Not enough memory to grab the fence list, as last resort
1704                  * block for all the fences to complete.
1705                  */
1706                 reservation_object_wait_timeout_rcu(resv, true, false,
1707                                                     MAX_SCHEDULE_TIMEOUT);
1708                 return;
1709         }
1710
1711         /* Add a callback for each fence in the reservation object */
1712         amdgpu_vm_prt_get(adev);
1713         amdgpu_vm_add_prt_cb(adev, excl);
1714
1715         for (i = 0; i < shared_count; ++i) {
1716                 amdgpu_vm_prt_get(adev);
1717                 amdgpu_vm_add_prt_cb(adev, shared[i]);
1718         }
1719
1720         kfree(shared);
1721 }
1722
1723 /**
1724  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1725  *
1726  * @adev: amdgpu_device pointer
1727  * @vm: requested vm
1728  * @fence: optional resulting fence (unchanged if no work needed to be done
1729  * or if an error occurred)
1730  *
1731  * Make sure all freed BOs are cleared in the PT.
1732  * Returns 0 for success.
1733  *
1734  * PTs have to be reserved and mutex must be locked!
1735  */
1736 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1737                           struct amdgpu_vm *vm,
1738                           struct dma_fence **fence)
1739 {
1740         struct amdgpu_bo_va_mapping *mapping;
1741         uint64_t init_pte_value = 0;
1742         struct dma_fence *f = NULL;
1743         int r;
1744
1745         while (!list_empty(&vm->freed)) {
1746                 mapping = list_first_entry(&vm->freed,
1747                         struct amdgpu_bo_va_mapping, list);
1748                 list_del(&mapping->list);
1749
1750                 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1751                         init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1752
1753                 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
1754                                                 mapping->start, mapping->last,
1755                                                 init_pte_value, 0, &f);
1756                 amdgpu_vm_free_mapping(adev, vm, mapping, f);
1757                 if (r) {
1758                         dma_fence_put(f);
1759                         return r;
1760                 }
1761         }
1762
1763         if (fence && f) {
1764                 dma_fence_put(*fence);
1765                 *fence = f;
1766         } else {
1767                 dma_fence_put(f);
1768         }
1769
1770         return 0;
1771
1772 }
1773
1774 /**
1775  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1776  *
1777  * @adev: amdgpu_device pointer
1778  * @vm: requested vm
1779  * @sync: sync object to add fences to
1780  *
1781  * Make sure all BOs which are moved are updated in the PTs.
1782  * Returns 0 for success.
1783  *
1784  * PTs have to be reserved!
1785  */
1786 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1787                            struct amdgpu_vm *vm)
1788 {
1789         bool clear;
1790         int r = 0;
1791
1792         spin_lock(&vm->moved_lock);
1793         while (!list_empty(&vm->moved)) {
1794                 struct amdgpu_bo_va *bo_va;
1795                 struct reservation_object *resv;
1796
1797                 bo_va = list_first_entry(&vm->moved,
1798                         struct amdgpu_bo_va, base.vm_status);
1799                 spin_unlock(&vm->moved_lock);
1800
1801                 resv = bo_va->base.bo->tbo.resv;
1802
1803                 /* Per VM BOs never need to bo cleared in the page tables */
1804                 if (resv == vm->root.base.bo->tbo.resv)
1805                         clear = false;
1806                 /* Try to reserve the BO to avoid clearing its ptes */
1807                 else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
1808                         clear = false;
1809                 /* Somebody else is using the BO right now */
1810                 else
1811                         clear = true;
1812
1813                 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1814                 if (r)
1815                         return r;
1816
1817                 if (!clear && resv != vm->root.base.bo->tbo.resv)
1818                         reservation_object_unlock(resv);
1819
1820                 spin_lock(&vm->moved_lock);
1821         }
1822         spin_unlock(&vm->moved_lock);
1823
1824         return r;
1825 }
1826
1827 /**
1828  * amdgpu_vm_bo_add - add a bo to a specific vm
1829  *
1830  * @adev: amdgpu_device pointer
1831  * @vm: requested vm
1832  * @bo: amdgpu buffer object
1833  *
1834  * Add @bo into the requested vm.
1835  * Add @bo to the list of bos associated with the vm
1836  * Returns newly added bo_va or NULL for failure
1837  *
1838  * Object has to be reserved!
1839  */
1840 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1841                                       struct amdgpu_vm *vm,
1842                                       struct amdgpu_bo *bo)
1843 {
1844         struct amdgpu_bo_va *bo_va;
1845
1846         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1847         if (bo_va == NULL) {
1848                 return NULL;
1849         }
1850         amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1851
1852         bo_va->ref_count = 1;
1853         INIT_LIST_HEAD(&bo_va->valids);
1854         INIT_LIST_HEAD(&bo_va->invalids);
1855
1856         return bo_va;
1857 }
1858
1859
1860 /**
1861  * amdgpu_vm_bo_insert_mapping - insert a new mapping
1862  *
1863  * @adev: amdgpu_device pointer
1864  * @bo_va: bo_va to store the address
1865  * @mapping: the mapping to insert
1866  *
1867  * Insert a new mapping into all structures.
1868  */
1869 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1870                                     struct amdgpu_bo_va *bo_va,
1871                                     struct amdgpu_bo_va_mapping *mapping)
1872 {
1873         struct amdgpu_vm *vm = bo_va->base.vm;
1874         struct amdgpu_bo *bo = bo_va->base.bo;
1875
1876         mapping->bo_va = bo_va;
1877         list_add(&mapping->list, &bo_va->invalids);
1878         amdgpu_vm_it_insert(mapping, &vm->va);
1879
1880         if (mapping->flags & AMDGPU_PTE_PRT)
1881                 amdgpu_vm_prt_get(adev);
1882
1883         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1884                 spin_lock(&vm->moved_lock);
1885                 if (list_empty(&bo_va->base.vm_status))
1886                         list_add(&bo_va->base.vm_status, &vm->moved);
1887                 spin_unlock(&vm->moved_lock);
1888         }
1889         trace_amdgpu_vm_bo_map(bo_va, mapping);
1890 }
1891
1892 /**
1893  * amdgpu_vm_bo_map - map bo inside a vm
1894  *
1895  * @adev: amdgpu_device pointer
1896  * @bo_va: bo_va to store the address
1897  * @saddr: where to map the BO
1898  * @offset: requested offset in the BO
1899  * @flags: attributes of pages (read/write/valid/etc.)
1900  *
1901  * Add a mapping of the BO at the specefied addr into the VM.
1902  * Returns 0 for success, error for failure.
1903  *
1904  * Object has to be reserved and unreserved outside!
1905  */
1906 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1907                      struct amdgpu_bo_va *bo_va,
1908                      uint64_t saddr, uint64_t offset,
1909                      uint64_t size, uint64_t flags)
1910 {
1911         struct amdgpu_bo_va_mapping *mapping, *tmp;
1912         struct amdgpu_bo *bo = bo_va->base.bo;
1913         struct amdgpu_vm *vm = bo_va->base.vm;
1914         uint64_t eaddr;
1915
1916         /* validate the parameters */
1917         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1918             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1919                 return -EINVAL;
1920
1921         /* make sure object fit at this offset */
1922         eaddr = saddr + size - 1;
1923         if (saddr >= eaddr ||
1924             (bo && offset + size > amdgpu_bo_size(bo)))
1925                 return -EINVAL;
1926
1927         saddr /= AMDGPU_GPU_PAGE_SIZE;
1928         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1929
1930         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1931         if (tmp) {
1932                 /* bo and tmp overlap, invalid addr */
1933                 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1934                         "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1935                         tmp->start, tmp->last + 1);
1936                 return -EINVAL;
1937         }
1938
1939         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1940         if (!mapping)
1941                 return -ENOMEM;
1942
1943         mapping->start = saddr;
1944         mapping->last = eaddr;
1945         mapping->offset = offset;
1946         mapping->flags = flags;
1947
1948         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1949
1950         return 0;
1951 }
1952
1953 /**
1954  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1955  *
1956  * @adev: amdgpu_device pointer
1957  * @bo_va: bo_va to store the address
1958  * @saddr: where to map the BO
1959  * @offset: requested offset in the BO
1960  * @flags: attributes of pages (read/write/valid/etc.)
1961  *
1962  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1963  * mappings as we do so.
1964  * Returns 0 for success, error for failure.
1965  *
1966  * Object has to be reserved and unreserved outside!
1967  */
1968 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1969                              struct amdgpu_bo_va *bo_va,
1970                              uint64_t saddr, uint64_t offset,
1971                              uint64_t size, uint64_t flags)
1972 {
1973         struct amdgpu_bo_va_mapping *mapping;
1974         struct amdgpu_bo *bo = bo_va->base.bo;
1975         uint64_t eaddr;
1976         int r;
1977
1978         /* validate the parameters */
1979         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1980             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1981                 return -EINVAL;
1982
1983         /* make sure object fit at this offset */
1984         eaddr = saddr + size - 1;
1985         if (saddr >= eaddr ||
1986             (bo && offset + size > amdgpu_bo_size(bo)))
1987                 return -EINVAL;
1988
1989         /* Allocate all the needed memory */
1990         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1991         if (!mapping)
1992                 return -ENOMEM;
1993
1994         r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1995         if (r) {
1996                 kfree(mapping);
1997                 return r;
1998         }
1999
2000         saddr /= AMDGPU_GPU_PAGE_SIZE;
2001         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2002
2003         mapping->start = saddr;
2004         mapping->last = eaddr;
2005         mapping->offset = offset;
2006         mapping->flags = flags;
2007
2008         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2009
2010         return 0;
2011 }
2012
2013 /**
2014  * amdgpu_vm_bo_unmap - remove bo mapping from vm
2015  *
2016  * @adev: amdgpu_device pointer
2017  * @bo_va: bo_va to remove the address from
2018  * @saddr: where to the BO is mapped
2019  *
2020  * Remove a mapping of the BO at the specefied addr from the VM.
2021  * Returns 0 for success, error for failure.
2022  *
2023  * Object has to be reserved and unreserved outside!
2024  */
2025 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2026                        struct amdgpu_bo_va *bo_va,
2027                        uint64_t saddr)
2028 {
2029         struct amdgpu_bo_va_mapping *mapping;
2030         struct amdgpu_vm *vm = bo_va->base.vm;
2031         bool valid = true;
2032
2033         saddr /= AMDGPU_GPU_PAGE_SIZE;
2034
2035         list_for_each_entry(mapping, &bo_va->valids, list) {
2036                 if (mapping->start == saddr)
2037                         break;
2038         }
2039
2040         if (&mapping->list == &bo_va->valids) {
2041                 valid = false;
2042
2043                 list_for_each_entry(mapping, &bo_va->invalids, list) {
2044                         if (mapping->start == saddr)
2045                                 break;
2046                 }
2047
2048                 if (&mapping->list == &bo_va->invalids)
2049                         return -ENOENT;
2050         }
2051
2052         list_del(&mapping->list);
2053         amdgpu_vm_it_remove(mapping, &vm->va);
2054         mapping->bo_va = NULL;
2055         trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2056
2057         if (valid)
2058                 list_add(&mapping->list, &vm->freed);
2059         else
2060                 amdgpu_vm_free_mapping(adev, vm, mapping,
2061                                        bo_va->last_pt_update);
2062
2063         return 0;
2064 }
2065
2066 /**
2067  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2068  *
2069  * @adev: amdgpu_device pointer
2070  * @vm: VM structure to use
2071  * @saddr: start of the range
2072  * @size: size of the range
2073  *
2074  * Remove all mappings in a range, split them as appropriate.
2075  * Returns 0 for success, error for failure.
2076  */
2077 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2078                                 struct amdgpu_vm *vm,
2079                                 uint64_t saddr, uint64_t size)
2080 {
2081         struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2082         LIST_HEAD(removed);
2083         uint64_t eaddr;
2084
2085         eaddr = saddr + size - 1;
2086         saddr /= AMDGPU_GPU_PAGE_SIZE;
2087         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2088
2089         /* Allocate all the needed memory */
2090         before = kzalloc(sizeof(*before), GFP_KERNEL);
2091         if (!before)
2092                 return -ENOMEM;
2093         INIT_LIST_HEAD(&before->list);
2094
2095         after = kzalloc(sizeof(*after), GFP_KERNEL);
2096         if (!after) {
2097                 kfree(before);
2098                 return -ENOMEM;
2099         }
2100         INIT_LIST_HEAD(&after->list);
2101
2102         /* Now gather all removed mappings */
2103         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2104         while (tmp) {
2105                 /* Remember mapping split at the start */
2106                 if (tmp->start < saddr) {
2107                         before->start = tmp->start;
2108                         before->last = saddr - 1;
2109                         before->offset = tmp->offset;
2110                         before->flags = tmp->flags;
2111                         list_add(&before->list, &tmp->list);
2112                 }
2113
2114                 /* Remember mapping split at the end */
2115                 if (tmp->last > eaddr) {
2116                         after->start = eaddr + 1;
2117                         after->last = tmp->last;
2118                         after->offset = tmp->offset;
2119                         after->offset += after->start - tmp->start;
2120                         after->flags = tmp->flags;
2121                         list_add(&after->list, &tmp->list);
2122                 }
2123
2124                 list_del(&tmp->list);
2125                 list_add(&tmp->list, &removed);
2126
2127                 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2128         }
2129
2130         /* And free them up */
2131         list_for_each_entry_safe(tmp, next, &removed, list) {
2132                 amdgpu_vm_it_remove(tmp, &vm->va);
2133                 list_del(&tmp->list);
2134
2135                 if (tmp->start < saddr)
2136                     tmp->start = saddr;
2137                 if (tmp->last > eaddr)
2138                     tmp->last = eaddr;
2139
2140                 tmp->bo_va = NULL;
2141                 list_add(&tmp->list, &vm->freed);
2142                 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2143         }
2144
2145         /* Insert partial mapping before the range */
2146         if (!list_empty(&before->list)) {
2147                 amdgpu_vm_it_insert(before, &vm->va);
2148                 if (before->flags & AMDGPU_PTE_PRT)
2149                         amdgpu_vm_prt_get(adev);
2150         } else {
2151                 kfree(before);
2152         }
2153
2154         /* Insert partial mapping after the range */
2155         if (!list_empty(&after->list)) {
2156                 amdgpu_vm_it_insert(after, &vm->va);
2157                 if (after->flags & AMDGPU_PTE_PRT)
2158                         amdgpu_vm_prt_get(adev);
2159         } else {
2160                 kfree(after);
2161         }
2162
2163         return 0;
2164 }
2165
2166 /**
2167  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2168  *
2169  * @vm: the requested VM
2170  *
2171  * Find a mapping by it's address.
2172  */
2173 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2174                                                          uint64_t addr)
2175 {
2176         return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2177 }
2178
2179 /**
2180  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2181  *
2182  * @adev: amdgpu_device pointer
2183  * @bo_va: requested bo_va
2184  *
2185  * Remove @bo_va->bo from the requested vm.
2186  *
2187  * Object have to be reserved!
2188  */
2189 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2190                       struct amdgpu_bo_va *bo_va)
2191 {
2192         struct amdgpu_bo_va_mapping *mapping, *next;
2193         struct amdgpu_vm *vm = bo_va->base.vm;
2194
2195         list_del(&bo_va->base.bo_list);
2196
2197         spin_lock(&vm->moved_lock);
2198         list_del(&bo_va->base.vm_status);
2199         spin_unlock(&vm->moved_lock);
2200
2201         list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2202                 list_del(&mapping->list);
2203                 amdgpu_vm_it_remove(mapping, &vm->va);
2204                 mapping->bo_va = NULL;
2205                 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2206                 list_add(&mapping->list, &vm->freed);
2207         }
2208         list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2209                 list_del(&mapping->list);
2210                 amdgpu_vm_it_remove(mapping, &vm->va);
2211                 amdgpu_vm_free_mapping(adev, vm, mapping,
2212                                        bo_va->last_pt_update);
2213         }
2214
2215         dma_fence_put(bo_va->last_pt_update);
2216         kfree(bo_va);
2217 }
2218
2219 /**
2220  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2221  *
2222  * @adev: amdgpu_device pointer
2223  * @vm: requested vm
2224  * @bo: amdgpu buffer object
2225  *
2226  * Mark @bo as invalid.
2227  */
2228 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2229                              struct amdgpu_bo *bo, bool evicted)
2230 {
2231         struct amdgpu_vm_bo_base *bo_base;
2232
2233         /* shadow bo doesn't have bo base, its validation needs its parent */
2234         if (bo->parent && bo->parent->shadow == bo)
2235                 bo = bo->parent;
2236
2237         list_for_each_entry(bo_base, &bo->va, bo_list) {
2238                 struct amdgpu_vm *vm = bo_base->vm;
2239
2240                 bo_base->moved = true;
2241                 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2242                         if (bo->tbo.type == ttm_bo_type_kernel)
2243                                 list_move(&bo_base->vm_status, &vm->evicted);
2244                         else
2245                                 list_move_tail(&bo_base->vm_status,
2246                                                &vm->evicted);
2247                         continue;
2248                 }
2249
2250                 if (bo->tbo.type == ttm_bo_type_kernel) {
2251                         if (list_empty(&bo_base->vm_status))
2252                                 list_add(&bo_base->vm_status, &vm->relocated);
2253                         continue;
2254                 }
2255
2256                 spin_lock(&bo_base->vm->moved_lock);
2257                 if (list_empty(&bo_base->vm_status))
2258                         list_add(&bo_base->vm_status, &vm->moved);
2259                 spin_unlock(&bo_base->vm->moved_lock);
2260         }
2261 }
2262
2263 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2264 {
2265         /* Total bits covered by PD + PTs */
2266         unsigned bits = ilog2(vm_size) + 18;
2267
2268         /* Make sure the PD is 4K in size up to 8GB address space.
2269            Above that split equal between PD and PTs */
2270         if (vm_size <= 8)
2271                 return (bits - 9);
2272         else
2273                 return ((bits + 3) / 2);
2274 }
2275
2276 /**
2277  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2278  *
2279  * @adev: amdgpu_device pointer
2280  * @vm_size: the default vm size if it's set auto
2281  */
2282 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2283                            uint32_t fragment_size_default, unsigned max_level,
2284                            unsigned max_bits)
2285 {
2286         uint64_t tmp;
2287
2288         /* adjust vm size first */
2289         if (amdgpu_vm_size != -1) {
2290                 unsigned max_size = 1 << (max_bits - 30);
2291
2292                 vm_size = amdgpu_vm_size;
2293                 if (vm_size > max_size) {
2294                         dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2295                                  amdgpu_vm_size, max_size);
2296                         vm_size = max_size;
2297                 }
2298         }
2299
2300         adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2301
2302         tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2303         if (amdgpu_vm_block_size != -1)
2304                 tmp >>= amdgpu_vm_block_size - 9;
2305         tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2306         adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2307         switch (adev->vm_manager.num_level) {
2308         case 3:
2309                 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2310                 break;
2311         case 2:
2312                 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2313                 break;
2314         case 1:
2315                 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2316                 break;
2317         default:
2318                 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2319         }
2320         /* block size depends on vm size and hw setup*/
2321         if (amdgpu_vm_block_size != -1)
2322                 adev->vm_manager.block_size =
2323                         min((unsigned)amdgpu_vm_block_size, max_bits
2324                             - AMDGPU_GPU_PAGE_SHIFT
2325                             - 9 * adev->vm_manager.num_level);
2326         else if (adev->vm_manager.num_level > 1)
2327                 adev->vm_manager.block_size = 9;
2328         else
2329                 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2330
2331         if (amdgpu_vm_fragment_size == -1)
2332                 adev->vm_manager.fragment_size = fragment_size_default;
2333         else
2334                 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2335
2336         DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2337                  vm_size, adev->vm_manager.num_level + 1,
2338                  adev->vm_manager.block_size,
2339                  adev->vm_manager.fragment_size);
2340 }
2341
2342 /**
2343  * amdgpu_vm_init - initialize a vm instance
2344  *
2345  * @adev: amdgpu_device pointer
2346  * @vm: requested vm
2347  * @vm_context: Indicates if it GFX or Compute context
2348  *
2349  * Init @vm fields.
2350  */
2351 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2352                    int vm_context, unsigned int pasid)
2353 {
2354         struct amdgpu_bo_param bp;
2355         struct amdgpu_bo *root;
2356         const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2357                 AMDGPU_VM_PTE_COUNT(adev) * 8);
2358         unsigned ring_instance;
2359         struct amdgpu_ring *ring;
2360         struct drm_sched_rq *rq;
2361         unsigned long size;
2362         uint64_t flags;
2363         int r, i;
2364
2365         vm->va = RB_ROOT_CACHED;
2366         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2367                 vm->reserved_vmid[i] = NULL;
2368         INIT_LIST_HEAD(&vm->evicted);
2369         INIT_LIST_HEAD(&vm->relocated);
2370         spin_lock_init(&vm->moved_lock);
2371         INIT_LIST_HEAD(&vm->moved);
2372         INIT_LIST_HEAD(&vm->freed);
2373
2374         /* create scheduler entity for page table updates */
2375
2376         ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2377         ring_instance %= adev->vm_manager.vm_pte_num_rings;
2378         ring = adev->vm_manager.vm_pte_rings[ring_instance];
2379         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2380         r = drm_sched_entity_init(&ring->sched, &vm->entity,
2381                                   rq, NULL);
2382         if (r)
2383                 return r;
2384
2385         vm->pte_support_ats = false;
2386
2387         if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2388                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2389                                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2390
2391                 if (adev->asic_type == CHIP_RAVEN)
2392                         vm->pte_support_ats = true;
2393         } else {
2394                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2395                                                 AMDGPU_VM_USE_CPU_FOR_GFX);
2396         }
2397         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2398                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2399         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2400                   "CPU update of VM recommended only for large BAR system\n");
2401         vm->last_update = NULL;
2402
2403         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2404         if (vm->use_cpu_for_update)
2405                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2406         else
2407                 flags |= AMDGPU_GEM_CREATE_SHADOW;
2408
2409         size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2410         memset(&bp, 0, sizeof(bp));
2411         bp.size = size;
2412         bp.byte_align = align;
2413         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2414         bp.flags = flags;
2415         bp.type = ttm_bo_type_kernel;
2416         bp.resv = NULL;
2417         r = amdgpu_bo_create(adev, &bp, &root);
2418         if (r)
2419                 goto error_free_sched_entity;
2420
2421         r = amdgpu_bo_reserve(root, true);
2422         if (r)
2423                 goto error_free_root;
2424
2425         r = amdgpu_vm_clear_bo(adev, vm, root,
2426                                adev->vm_manager.root_level,
2427                                vm->pte_support_ats);
2428         if (r)
2429                 goto error_unreserve;
2430
2431         amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2432         amdgpu_bo_unreserve(vm->root.base.bo);
2433
2434         if (pasid) {
2435                 unsigned long flags;
2436
2437                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2438                 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2439                               GFP_ATOMIC);
2440                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2441                 if (r < 0)
2442                         goto error_free_root;
2443
2444                 vm->pasid = pasid;
2445         }
2446
2447         INIT_KFIFO(vm->faults);
2448         vm->fault_credit = 16;
2449
2450         return 0;
2451
2452 error_unreserve:
2453         amdgpu_bo_unreserve(vm->root.base.bo);
2454
2455 error_free_root:
2456         amdgpu_bo_unref(&vm->root.base.bo->shadow);
2457         amdgpu_bo_unref(&vm->root.base.bo);
2458         vm->root.base.bo = NULL;
2459
2460 error_free_sched_entity:
2461         drm_sched_entity_fini(&ring->sched, &vm->entity);
2462
2463         return r;
2464 }
2465
2466 /**
2467  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2468  *
2469  * This only works on GFX VMs that don't have any BOs added and no
2470  * page tables allocated yet.
2471  *
2472  * Changes the following VM parameters:
2473  * - use_cpu_for_update
2474  * - pte_supports_ats
2475  * - pasid (old PASID is released, because compute manages its own PASIDs)
2476  *
2477  * Reinitializes the page directory to reflect the changed ATS
2478  * setting. May leave behind an unused shadow BO for the page
2479  * directory when switching from SDMA updates to CPU updates.
2480  *
2481  * Returns 0 for success, -errno for errors.
2482  */
2483 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2484 {
2485         bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2486         int r;
2487
2488         r = amdgpu_bo_reserve(vm->root.base.bo, true);
2489         if (r)
2490                 return r;
2491
2492         /* Sanity checks */
2493         if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2494                 r = -EINVAL;
2495                 goto error;
2496         }
2497
2498         /* Check if PD needs to be reinitialized and do it before
2499          * changing any other state, in case it fails.
2500          */
2501         if (pte_support_ats != vm->pte_support_ats) {
2502                 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2503                                adev->vm_manager.root_level,
2504                                pte_support_ats);
2505                 if (r)
2506                         goto error;
2507         }
2508
2509         /* Update VM state */
2510         vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2511                                     AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2512         vm->pte_support_ats = pte_support_ats;
2513         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2514                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2515         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2516                   "CPU update of VM recommended only for large BAR system\n");
2517
2518         if (vm->pasid) {
2519                 unsigned long flags;
2520
2521                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2522                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2523                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2524
2525                 vm->pasid = 0;
2526         }
2527
2528 error:
2529         amdgpu_bo_unreserve(vm->root.base.bo);
2530         return r;
2531 }
2532
2533 /**
2534  * amdgpu_vm_free_levels - free PD/PT levels
2535  *
2536  * @adev: amdgpu device structure
2537  * @parent: PD/PT starting level to free
2538  * @level: level of parent structure
2539  *
2540  * Free the page directory or page table level and all sub levels.
2541  */
2542 static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2543                                   struct amdgpu_vm_pt *parent,
2544                                   unsigned level)
2545 {
2546         unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
2547
2548         if (parent->base.bo) {
2549                 list_del(&parent->base.bo_list);
2550                 list_del(&parent->base.vm_status);
2551                 amdgpu_bo_unref(&parent->base.bo->shadow);
2552                 amdgpu_bo_unref(&parent->base.bo);
2553         }
2554
2555         if (parent->entries)
2556                 for (i = 0; i < num_entries; i++)
2557                         amdgpu_vm_free_levels(adev, &parent->entries[i],
2558                                               level + 1);
2559
2560         kvfree(parent->entries);
2561 }
2562
2563 /**
2564  * amdgpu_vm_fini - tear down a vm instance
2565  *
2566  * @adev: amdgpu_device pointer
2567  * @vm: requested vm
2568  *
2569  * Tear down @vm.
2570  * Unbind the VM and remove all bos from the vm bo list
2571  */
2572 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2573 {
2574         struct amdgpu_bo_va_mapping *mapping, *tmp;
2575         bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2576         struct amdgpu_bo *root;
2577         u64 fault;
2578         int i, r;
2579
2580         amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2581
2582         /* Clear pending page faults from IH when the VM is destroyed */
2583         while (kfifo_get(&vm->faults, &fault))
2584                 amdgpu_ih_clear_fault(adev, fault);
2585
2586         if (vm->pasid) {
2587                 unsigned long flags;
2588
2589                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2590                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2591                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2592         }
2593
2594         drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2595
2596         if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2597                 dev_err(adev->dev, "still active bo inside vm\n");
2598         }
2599         rbtree_postorder_for_each_entry_safe(mapping, tmp,
2600                                              &vm->va.rb_root, rb) {
2601                 list_del(&mapping->list);
2602                 amdgpu_vm_it_remove(mapping, &vm->va);
2603                 kfree(mapping);
2604         }
2605         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2606                 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2607                         amdgpu_vm_prt_fini(adev, vm);
2608                         prt_fini_needed = false;
2609                 }
2610
2611                 list_del(&mapping->list);
2612                 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2613         }
2614
2615         root = amdgpu_bo_ref(vm->root.base.bo);
2616         r = amdgpu_bo_reserve(root, true);
2617         if (r) {
2618                 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2619         } else {
2620                 amdgpu_vm_free_levels(adev, &vm->root,
2621                                       adev->vm_manager.root_level);
2622                 amdgpu_bo_unreserve(root);
2623         }
2624         amdgpu_bo_unref(&root);
2625         dma_fence_put(vm->last_update);
2626         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2627                 amdgpu_vmid_free_reserved(adev, vm, i);
2628 }
2629
2630 /**
2631  * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2632  *
2633  * @adev: amdgpu_device pointer
2634  * @pasid: PASID do identify the VM
2635  *
2636  * This function is expected to be called in interrupt context. Returns
2637  * true if there was fault credit, false otherwise
2638  */
2639 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2640                                   unsigned int pasid)
2641 {
2642         struct amdgpu_vm *vm;
2643
2644         spin_lock(&adev->vm_manager.pasid_lock);
2645         vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2646         if (!vm) {
2647                 /* VM not found, can't track fault credit */
2648                 spin_unlock(&adev->vm_manager.pasid_lock);
2649                 return true;
2650         }
2651
2652         /* No lock needed. only accessed by IRQ handler */
2653         if (!vm->fault_credit) {
2654                 /* Too many faults in this VM */
2655                 spin_unlock(&adev->vm_manager.pasid_lock);
2656                 return false;
2657         }
2658
2659         vm->fault_credit--;
2660         spin_unlock(&adev->vm_manager.pasid_lock);
2661         return true;
2662 }
2663
2664 /**
2665  * amdgpu_vm_manager_init - init the VM manager
2666  *
2667  * @adev: amdgpu_device pointer
2668  *
2669  * Initialize the VM manager structures
2670  */
2671 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2672 {
2673         unsigned i;
2674
2675         amdgpu_vmid_mgr_init(adev);
2676
2677         adev->vm_manager.fence_context =
2678                 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2679         for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2680                 adev->vm_manager.seqno[i] = 0;
2681
2682         atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2683         spin_lock_init(&adev->vm_manager.prt_lock);
2684         atomic_set(&adev->vm_manager.num_prt_users, 0);
2685
2686         /* If not overridden by the user, by default, only in large BAR systems
2687          * Compute VM tables will be updated by CPU
2688          */
2689 #ifdef CONFIG_X86_64
2690         if (amdgpu_vm_update_mode == -1) {
2691                 if (amdgpu_vm_is_large_bar(adev))
2692                         adev->vm_manager.vm_update_mode =
2693                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2694                 else
2695                         adev->vm_manager.vm_update_mode = 0;
2696         } else
2697                 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2698 #else
2699         adev->vm_manager.vm_update_mode = 0;
2700 #endif
2701
2702         idr_init(&adev->vm_manager.pasid_idr);
2703         spin_lock_init(&adev->vm_manager.pasid_lock);
2704 }
2705
2706 /**
2707  * amdgpu_vm_manager_fini - cleanup VM manager
2708  *
2709  * @adev: amdgpu_device pointer
2710  *
2711  * Cleanup the VM manager and free resources.
2712  */
2713 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2714 {
2715         WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2716         idr_destroy(&adev->vm_manager.pasid_idr);
2717
2718         amdgpu_vmid_mgr_fini(adev);
2719 }
2720
2721 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2722 {
2723         union drm_amdgpu_vm *args = data;
2724         struct amdgpu_device *adev = dev->dev_private;
2725         struct amdgpu_fpriv *fpriv = filp->driver_priv;
2726         int r;
2727
2728         switch (args->in.op) {
2729         case AMDGPU_VM_OP_RESERVE_VMID:
2730                 /* current, we only have requirement to reserve vmid from gfxhub */
2731                 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2732                 if (r)
2733                         return r;
2734                 break;
2735         case AMDGPU_VM_OP_UNRESERVE_VMID:
2736                 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2737                 break;
2738         default:
2739                 return -EINVAL;
2740         }
2741
2742         return 0;
2743 }
This page took 0.201739 seconds and 4 git commands to generate.