]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drm/amdgpu: cleanup amdgpu_vm_validate_pt_bos v2
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/dma-fence-array.h>
29 #include <linux/interval_tree_generic.h>
30 #include <linux/idr.h>
31 #include <drm/drmP.h>
32 #include <drm/amdgpu_drm.h>
33 #include "amdgpu.h"
34 #include "amdgpu_trace.h"
35 #include "amdgpu_amdkfd.h"
36
37 /*
38  * GPUVM
39  * GPUVM is similar to the legacy gart on older asics, however
40  * rather than there being a single global gart table
41  * for the entire GPU, there are multiple VM page tables active
42  * at any given time.  The VM page tables can contain a mix
43  * vram pages and system memory pages and system memory pages
44  * can be mapped as snooped (cached system pages) or unsnooped
45  * (uncached system pages).
46  * Each VM has an ID associated with it and there is a page table
47  * associated with each VMID.  When execting a command buffer,
48  * the kernel tells the the ring what VMID to use for that command
49  * buffer.  VMIDs are allocated dynamically as commands are submitted.
50  * The userspace drivers maintain their own address space and the kernel
51  * sets up their pages tables accordingly when they submit their
52  * command buffers and a VMID is assigned.
53  * Cayman/Trinity support up to 8 active VMs at any given time;
54  * SI supports 16.
55  */
56
57 #define START(node) ((node)->start)
58 #define LAST(node) ((node)->last)
59
60 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
61                      START, LAST, static, amdgpu_vm_it)
62
63 #undef START
64 #undef LAST
65
66 /* Local structure. Encapsulate some VM table update parameters to reduce
67  * the number of function parameters
68  */
69 struct amdgpu_pte_update_params {
70         /* amdgpu device we do this update for */
71         struct amdgpu_device *adev;
72         /* optional amdgpu_vm we do this update for */
73         struct amdgpu_vm *vm;
74         /* address where to copy page table entries from */
75         uint64_t src;
76         /* indirect buffer to fill with commands */
77         struct amdgpu_ib *ib;
78         /* Function which actually does the update */
79         void (*func)(struct amdgpu_pte_update_params *params,
80                      struct amdgpu_bo *bo, uint64_t pe,
81                      uint64_t addr, unsigned count, uint32_t incr,
82                      uint64_t flags);
83         /* The next two are used during VM update by CPU
84          *  DMA addresses to use for mapping
85          *  Kernel pointer of PD/PT BO that needs to be updated
86          */
87         dma_addr_t *pages_addr;
88         void *kptr;
89 };
90
91 /* Helper to disable partial resident texture feature from a fence callback */
92 struct amdgpu_prt_cb {
93         struct amdgpu_device *adev;
94         struct dma_fence_cb cb;
95 };
96
97 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
98                                    struct amdgpu_vm *vm,
99                                    struct amdgpu_bo *bo)
100 {
101         base->vm = vm;
102         base->bo = bo;
103         INIT_LIST_HEAD(&base->bo_list);
104         INIT_LIST_HEAD(&base->vm_status);
105
106         if (!bo)
107                 return;
108         list_add_tail(&base->bo_list, &bo->va);
109
110         if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
111                 return;
112
113         if (bo->preferred_domains &
114             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
115                 return;
116
117         /*
118          * we checked all the prerequisites, but it looks like this per vm bo
119          * is currently evicted. add the bo to the evicted list to make sure it
120          * is validated on next vm use to avoid fault.
121          * */
122         list_move_tail(&base->vm_status, &vm->evicted);
123 }
124
125 /**
126  * amdgpu_vm_level_shift - return the addr shift for each level
127  *
128  * @adev: amdgpu_device pointer
129  *
130  * Returns the number of bits the pfn needs to be right shifted for a level.
131  */
132 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
133                                       unsigned level)
134 {
135         unsigned shift = 0xff;
136
137         switch (level) {
138         case AMDGPU_VM_PDB2:
139         case AMDGPU_VM_PDB1:
140         case AMDGPU_VM_PDB0:
141                 shift = 9 * (AMDGPU_VM_PDB0 - level) +
142                         adev->vm_manager.block_size;
143                 break;
144         case AMDGPU_VM_PTB:
145                 shift = 0;
146                 break;
147         default:
148                 dev_err(adev->dev, "the level%d isn't supported.\n", level);
149         }
150
151         return shift;
152 }
153
154 /**
155  * amdgpu_vm_num_entries - return the number of entries in a PD/PT
156  *
157  * @adev: amdgpu_device pointer
158  *
159  * Calculate the number of entries in a page directory or page table.
160  */
161 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
162                                       unsigned level)
163 {
164         unsigned shift = amdgpu_vm_level_shift(adev,
165                                                adev->vm_manager.root_level);
166
167         if (level == adev->vm_manager.root_level)
168                 /* For the root directory */
169                 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
170         else if (level != AMDGPU_VM_PTB)
171                 /* Everything in between */
172                 return 512;
173         else
174                 /* For the page tables on the leaves */
175                 return AMDGPU_VM_PTE_COUNT(adev);
176 }
177
178 /**
179  * amdgpu_vm_bo_size - returns the size of the BOs in bytes
180  *
181  * @adev: amdgpu_device pointer
182  *
183  * Calculate the size of the BO for a page directory or page table in bytes.
184  */
185 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
186 {
187         return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
188 }
189
190 /**
191  * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
192  *
193  * @vm: vm providing the BOs
194  * @validated: head of validation list
195  * @entry: entry to add
196  *
197  * Add the page directory to the list of BOs to
198  * validate for command submission.
199  */
200 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
201                          struct list_head *validated,
202                          struct amdgpu_bo_list_entry *entry)
203 {
204         entry->robj = vm->root.base.bo;
205         entry->priority = 0;
206         entry->tv.bo = &entry->robj->tbo;
207         entry->tv.shared = true;
208         entry->user_pages = NULL;
209         list_add(&entry->tv.head, validated);
210 }
211
212 /**
213  * amdgpu_vm_validate_pt_bos - validate the page table BOs
214  *
215  * @adev: amdgpu device pointer
216  * @vm: vm providing the BOs
217  * @validate: callback to do the validation
218  * @param: parameter for the validation callback
219  *
220  * Validate the page table BOs on command submission if neccessary.
221  */
222 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
223                               int (*validate)(void *p, struct amdgpu_bo *bo),
224                               void *param)
225 {
226         struct ttm_bo_global *glob = adev->mman.bdev.glob;
227         struct amdgpu_vm_bo_base *bo_base, *tmp;
228         int r = 0;
229
230         list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
231                 struct amdgpu_bo *bo = bo_base->bo;
232
233                 if (bo->parent) {
234                         r = validate(param, bo);
235                         if (r)
236                                 break;
237
238                         spin_lock(&glob->lru_lock);
239                         ttm_bo_move_to_lru_tail(&bo->tbo);
240                         if (bo->shadow)
241                                 ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
242                         spin_unlock(&glob->lru_lock);
243                 }
244
245                 if (bo->tbo.type == ttm_bo_type_kernel &&
246                     vm->use_cpu_for_update) {
247                         r = amdgpu_bo_kmap(bo, NULL);
248                         if (r)
249                                 break;
250                 }
251
252                 if (bo->tbo.type != ttm_bo_type_kernel) {
253                         spin_lock(&vm->moved_lock);
254                         list_move(&bo_base->vm_status, &vm->moved);
255                         spin_unlock(&vm->moved_lock);
256                 } else {
257                         list_move(&bo_base->vm_status, &vm->relocated);
258                 }
259         }
260
261         return r;
262 }
263
264 /**
265  * amdgpu_vm_ready - check VM is ready for updates
266  *
267  * @vm: VM to check
268  *
269  * Check if all VM PDs/PTs are ready for updates
270  */
271 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
272 {
273         return list_empty(&vm->evicted);
274 }
275
276 /**
277  * amdgpu_vm_clear_bo - initially clear the PDs/PTs
278  *
279  * @adev: amdgpu_device pointer
280  * @bo: BO to clear
281  * @level: level this BO is at
282  *
283  * Root PD needs to be reserved when calling this.
284  */
285 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
286                               struct amdgpu_vm *vm, struct amdgpu_bo *bo,
287                               unsigned level, bool pte_support_ats)
288 {
289         struct ttm_operation_ctx ctx = { true, false };
290         struct dma_fence *fence = NULL;
291         unsigned entries, ats_entries;
292         struct amdgpu_ring *ring;
293         struct amdgpu_job *job;
294         uint64_t addr;
295         int r;
296
297         addr = amdgpu_bo_gpu_offset(bo);
298         entries = amdgpu_bo_size(bo) / 8;
299
300         if (pte_support_ats) {
301                 if (level == adev->vm_manager.root_level) {
302                         ats_entries = amdgpu_vm_level_shift(adev, level);
303                         ats_entries += AMDGPU_GPU_PAGE_SHIFT;
304                         ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
305                         ats_entries = min(ats_entries, entries);
306                         entries -= ats_entries;
307                 } else {
308                         ats_entries = entries;
309                         entries = 0;
310                 }
311         } else {
312                 ats_entries = 0;
313         }
314
315         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
316
317         r = reservation_object_reserve_shared(bo->tbo.resv);
318         if (r)
319                 return r;
320
321         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
322         if (r)
323                 goto error;
324
325         r = amdgpu_job_alloc_with_ib(adev, 64, &job);
326         if (r)
327                 goto error;
328
329         if (ats_entries) {
330                 uint64_t ats_value;
331
332                 ats_value = AMDGPU_PTE_DEFAULT_ATC;
333                 if (level != AMDGPU_VM_PTB)
334                         ats_value |= AMDGPU_PDE_PTE;
335
336                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
337                                       ats_entries, 0, ats_value);
338                 addr += ats_entries * 8;
339         }
340
341         if (entries)
342                 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
343                                       entries, 0, 0);
344
345         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
346
347         WARN_ON(job->ibs[0].length_dw > 64);
348         r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
349                              AMDGPU_FENCE_OWNER_UNDEFINED, false);
350         if (r)
351                 goto error_free;
352
353         r = amdgpu_job_submit(job, ring, &vm->entity,
354                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
355         if (r)
356                 goto error_free;
357
358         amdgpu_bo_fence(bo, fence, true);
359         dma_fence_put(fence);
360
361         if (bo->shadow)
362                 return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
363                                           level, pte_support_ats);
364
365         return 0;
366
367 error_free:
368         amdgpu_job_free(job);
369
370 error:
371         return r;
372 }
373
374 /**
375  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
376  *
377  * @adev: amdgpu_device pointer
378  * @vm: requested vm
379  * @saddr: start of the address range
380  * @eaddr: end of the address range
381  *
382  * Make sure the page directories and page tables are allocated
383  */
384 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
385                                   struct amdgpu_vm *vm,
386                                   struct amdgpu_vm_pt *parent,
387                                   uint64_t saddr, uint64_t eaddr,
388                                   unsigned level, bool ats)
389 {
390         unsigned shift = amdgpu_vm_level_shift(adev, level);
391         unsigned pt_idx, from, to;
392         u64 flags;
393         int r;
394
395         if (!parent->entries) {
396                 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
397
398                 parent->entries = kvmalloc_array(num_entries,
399                                                    sizeof(struct amdgpu_vm_pt),
400                                                    GFP_KERNEL | __GFP_ZERO);
401                 if (!parent->entries)
402                         return -ENOMEM;
403                 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
404         }
405
406         from = saddr >> shift;
407         to = eaddr >> shift;
408         if (from >= amdgpu_vm_num_entries(adev, level) ||
409             to >= amdgpu_vm_num_entries(adev, level))
410                 return -EINVAL;
411
412         ++level;
413         saddr = saddr & ((1 << shift) - 1);
414         eaddr = eaddr & ((1 << shift) - 1);
415
416         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
417         if (vm->use_cpu_for_update)
418                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
419         else
420                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
421                                 AMDGPU_GEM_CREATE_SHADOW);
422
423         /* walk over the address space and allocate the page tables */
424         for (pt_idx = from; pt_idx <= to; ++pt_idx) {
425                 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
426                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
427                 struct amdgpu_bo *pt;
428
429                 if (!entry->base.bo) {
430                         struct amdgpu_bo_param bp;
431
432                         memset(&bp, 0, sizeof(bp));
433                         bp.size = amdgpu_vm_bo_size(adev, level);
434                         bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
435                         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
436                         bp.flags = flags;
437                         bp.type = ttm_bo_type_kernel;
438                         bp.resv = resv;
439                         r = amdgpu_bo_create(adev, &bp, &pt);
440                         if (r)
441                                 return r;
442
443                         r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
444                         if (r) {
445                                 amdgpu_bo_unref(&pt->shadow);
446                                 amdgpu_bo_unref(&pt);
447                                 return r;
448                         }
449
450                         if (vm->use_cpu_for_update) {
451                                 r = amdgpu_bo_kmap(pt, NULL);
452                                 if (r) {
453                                         amdgpu_bo_unref(&pt->shadow);
454                                         amdgpu_bo_unref(&pt);
455                                         return r;
456                                 }
457                         }
458
459                         /* Keep a reference to the root directory to avoid
460                         * freeing them up in the wrong order.
461                         */
462                         pt->parent = amdgpu_bo_ref(parent->base.bo);
463
464                         amdgpu_vm_bo_base_init(&entry->base, vm, pt);
465                         list_move(&entry->base.vm_status, &vm->relocated);
466                 }
467
468                 if (level < AMDGPU_VM_PTB) {
469                         uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
470                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
471                                 ((1 << shift) - 1);
472                         r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
473                                                    sub_eaddr, level, ats);
474                         if (r)
475                                 return r;
476                 }
477         }
478
479         return 0;
480 }
481
482 /**
483  * amdgpu_vm_alloc_pts - Allocate page tables.
484  *
485  * @adev: amdgpu_device pointer
486  * @vm: VM to allocate page tables for
487  * @saddr: Start address which needs to be allocated
488  * @size: Size from start address we need.
489  *
490  * Make sure the page tables are allocated.
491  */
492 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
493                         struct amdgpu_vm *vm,
494                         uint64_t saddr, uint64_t size)
495 {
496         uint64_t eaddr;
497         bool ats = false;
498
499         /* validate the parameters */
500         if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
501                 return -EINVAL;
502
503         eaddr = saddr + size - 1;
504
505         if (vm->pte_support_ats)
506                 ats = saddr < AMDGPU_VA_HOLE_START;
507
508         saddr /= AMDGPU_GPU_PAGE_SIZE;
509         eaddr /= AMDGPU_GPU_PAGE_SIZE;
510
511         if (eaddr >= adev->vm_manager.max_pfn) {
512                 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
513                         eaddr, adev->vm_manager.max_pfn);
514                 return -EINVAL;
515         }
516
517         return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
518                                       adev->vm_manager.root_level, ats);
519 }
520
521 /**
522  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
523  *
524  * @adev: amdgpu_device pointer
525  */
526 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
527 {
528         const struct amdgpu_ip_block *ip_block;
529         bool has_compute_vm_bug;
530         struct amdgpu_ring *ring;
531         int i;
532
533         has_compute_vm_bug = false;
534
535         ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
536         if (ip_block) {
537                 /* Compute has a VM bug for GFX version < 7.
538                    Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
539                 if (ip_block->version->major <= 7)
540                         has_compute_vm_bug = true;
541                 else if (ip_block->version->major == 8)
542                         if (adev->gfx.mec_fw_version < 673)
543                                 has_compute_vm_bug = true;
544         }
545
546         for (i = 0; i < adev->num_rings; i++) {
547                 ring = adev->rings[i];
548                 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
549                         /* only compute rings */
550                         ring->has_compute_vm_bug = has_compute_vm_bug;
551                 else
552                         ring->has_compute_vm_bug = false;
553         }
554 }
555
556 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
557                                   struct amdgpu_job *job)
558 {
559         struct amdgpu_device *adev = ring->adev;
560         unsigned vmhub = ring->funcs->vmhub;
561         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
562         struct amdgpu_vmid *id;
563         bool gds_switch_needed;
564         bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
565
566         if (job->vmid == 0)
567                 return false;
568         id = &id_mgr->ids[job->vmid];
569         gds_switch_needed = ring->funcs->emit_gds_switch && (
570                 id->gds_base != job->gds_base ||
571                 id->gds_size != job->gds_size ||
572                 id->gws_base != job->gws_base ||
573                 id->gws_size != job->gws_size ||
574                 id->oa_base != job->oa_base ||
575                 id->oa_size != job->oa_size);
576
577         if (amdgpu_vmid_had_gpu_reset(adev, id))
578                 return true;
579
580         return vm_flush_needed || gds_switch_needed;
581 }
582
583 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
584 {
585         return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
586 }
587
588 /**
589  * amdgpu_vm_flush - hardware flush the vm
590  *
591  * @ring: ring to use for flush
592  * @vmid: vmid number to use
593  * @pd_addr: address of the page directory
594  *
595  * Emit a VM flush when it is necessary.
596  */
597 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
598 {
599         struct amdgpu_device *adev = ring->adev;
600         unsigned vmhub = ring->funcs->vmhub;
601         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
602         struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
603         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
604                 id->gds_base != job->gds_base ||
605                 id->gds_size != job->gds_size ||
606                 id->gws_base != job->gws_base ||
607                 id->gws_size != job->gws_size ||
608                 id->oa_base != job->oa_base ||
609                 id->oa_size != job->oa_size);
610         bool vm_flush_needed = job->vm_needs_flush;
611         bool pasid_mapping_needed = id->pasid != job->pasid ||
612                 !id->pasid_mapping ||
613                 !dma_fence_is_signaled(id->pasid_mapping);
614         struct dma_fence *fence = NULL;
615         unsigned patch_offset = 0;
616         int r;
617
618         if (amdgpu_vmid_had_gpu_reset(adev, id)) {
619                 gds_switch_needed = true;
620                 vm_flush_needed = true;
621                 pasid_mapping_needed = true;
622         }
623
624         gds_switch_needed &= !!ring->funcs->emit_gds_switch;
625         vm_flush_needed &= !!ring->funcs->emit_vm_flush;
626         pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
627                 ring->funcs->emit_wreg;
628
629         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
630                 return 0;
631
632         if (ring->funcs->init_cond_exec)
633                 patch_offset = amdgpu_ring_init_cond_exec(ring);
634
635         if (need_pipe_sync)
636                 amdgpu_ring_emit_pipeline_sync(ring);
637
638         if (vm_flush_needed) {
639                 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
640                 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
641         }
642
643         if (pasid_mapping_needed)
644                 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
645
646         if (vm_flush_needed || pasid_mapping_needed) {
647                 r = amdgpu_fence_emit(ring, &fence, 0);
648                 if (r)
649                         return r;
650         }
651
652         if (vm_flush_needed) {
653                 mutex_lock(&id_mgr->lock);
654                 dma_fence_put(id->last_flush);
655                 id->last_flush = dma_fence_get(fence);
656                 id->current_gpu_reset_count =
657                         atomic_read(&adev->gpu_reset_counter);
658                 mutex_unlock(&id_mgr->lock);
659         }
660
661         if (pasid_mapping_needed) {
662                 id->pasid = job->pasid;
663                 dma_fence_put(id->pasid_mapping);
664                 id->pasid_mapping = dma_fence_get(fence);
665         }
666         dma_fence_put(fence);
667
668         if (ring->funcs->emit_gds_switch && gds_switch_needed) {
669                 id->gds_base = job->gds_base;
670                 id->gds_size = job->gds_size;
671                 id->gws_base = job->gws_base;
672                 id->gws_size = job->gws_size;
673                 id->oa_base = job->oa_base;
674                 id->oa_size = job->oa_size;
675                 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
676                                             job->gds_size, job->gws_base,
677                                             job->gws_size, job->oa_base,
678                                             job->oa_size);
679         }
680
681         if (ring->funcs->patch_cond_exec)
682                 amdgpu_ring_patch_cond_exec(ring, patch_offset);
683
684         /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
685         if (ring->funcs->emit_switch_buffer) {
686                 amdgpu_ring_emit_switch_buffer(ring);
687                 amdgpu_ring_emit_switch_buffer(ring);
688         }
689         return 0;
690 }
691
692 /**
693  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
694  *
695  * @vm: requested vm
696  * @bo: requested buffer object
697  *
698  * Find @bo inside the requested vm.
699  * Search inside the @bos vm list for the requested vm
700  * Returns the found bo_va or NULL if none is found
701  *
702  * Object has to be reserved!
703  */
704 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
705                                        struct amdgpu_bo *bo)
706 {
707         struct amdgpu_bo_va *bo_va;
708
709         list_for_each_entry(bo_va, &bo->va, base.bo_list) {
710                 if (bo_va->base.vm == vm) {
711                         return bo_va;
712                 }
713         }
714         return NULL;
715 }
716
717 /**
718  * amdgpu_vm_do_set_ptes - helper to call the right asic function
719  *
720  * @params: see amdgpu_pte_update_params definition
721  * @bo: PD/PT to update
722  * @pe: addr of the page entry
723  * @addr: dst addr to write into pe
724  * @count: number of page entries to update
725  * @incr: increase next addr by incr bytes
726  * @flags: hw access flags
727  *
728  * Traces the parameters and calls the right asic functions
729  * to setup the page table using the DMA.
730  */
731 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
732                                   struct amdgpu_bo *bo,
733                                   uint64_t pe, uint64_t addr,
734                                   unsigned count, uint32_t incr,
735                                   uint64_t flags)
736 {
737         pe += amdgpu_bo_gpu_offset(bo);
738         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
739
740         if (count < 3) {
741                 amdgpu_vm_write_pte(params->adev, params->ib, pe,
742                                     addr | flags, count, incr);
743
744         } else {
745                 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
746                                       count, incr, flags);
747         }
748 }
749
750 /**
751  * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
752  *
753  * @params: see amdgpu_pte_update_params definition
754  * @bo: PD/PT to update
755  * @pe: addr of the page entry
756  * @addr: dst addr to write into pe
757  * @count: number of page entries to update
758  * @incr: increase next addr by incr bytes
759  * @flags: hw access flags
760  *
761  * Traces the parameters and calls the DMA function to copy the PTEs.
762  */
763 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
764                                    struct amdgpu_bo *bo,
765                                    uint64_t pe, uint64_t addr,
766                                    unsigned count, uint32_t incr,
767                                    uint64_t flags)
768 {
769         uint64_t src = (params->src + (addr >> 12) * 8);
770
771         pe += amdgpu_bo_gpu_offset(bo);
772         trace_amdgpu_vm_copy_ptes(pe, src, count);
773
774         amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
775 }
776
777 /**
778  * amdgpu_vm_map_gart - Resolve gart mapping of addr
779  *
780  * @pages_addr: optional DMA address to use for lookup
781  * @addr: the unmapped addr
782  *
783  * Look up the physical address of the page that the pte resolves
784  * to and return the pointer for the page table entry.
785  */
786 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
787 {
788         uint64_t result;
789
790         /* page table offset */
791         result = pages_addr[addr >> PAGE_SHIFT];
792
793         /* in case cpu page size != gpu page size*/
794         result |= addr & (~PAGE_MASK);
795
796         result &= 0xFFFFFFFFFFFFF000ULL;
797
798         return result;
799 }
800
801 /**
802  * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
803  *
804  * @params: see amdgpu_pte_update_params definition
805  * @bo: PD/PT to update
806  * @pe: kmap addr of the page entry
807  * @addr: dst addr to write into pe
808  * @count: number of page entries to update
809  * @incr: increase next addr by incr bytes
810  * @flags: hw access flags
811  *
812  * Write count number of PT/PD entries directly.
813  */
814 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
815                                    struct amdgpu_bo *bo,
816                                    uint64_t pe, uint64_t addr,
817                                    unsigned count, uint32_t incr,
818                                    uint64_t flags)
819 {
820         unsigned int i;
821         uint64_t value;
822
823         pe += (unsigned long)amdgpu_bo_kptr(bo);
824
825         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
826
827         for (i = 0; i < count; i++) {
828                 value = params->pages_addr ?
829                         amdgpu_vm_map_gart(params->pages_addr, addr) :
830                         addr;
831                 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
832                                        i, value, flags);
833                 addr += incr;
834         }
835 }
836
837 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
838                              void *owner)
839 {
840         struct amdgpu_sync sync;
841         int r;
842
843         amdgpu_sync_create(&sync);
844         amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
845         r = amdgpu_sync_wait(&sync, true);
846         amdgpu_sync_free(&sync);
847
848         return r;
849 }
850
851 /*
852  * amdgpu_vm_update_pde - update a single level in the hierarchy
853  *
854  * @param: parameters for the update
855  * @vm: requested vm
856  * @parent: parent directory
857  * @entry: entry to update
858  *
859  * Makes sure the requested entry in parent is up to date.
860  */
861 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
862                                  struct amdgpu_vm *vm,
863                                  struct amdgpu_vm_pt *parent,
864                                  struct amdgpu_vm_pt *entry)
865 {
866         struct amdgpu_bo *bo = parent->base.bo, *pbo;
867         uint64_t pde, pt, flags;
868         unsigned level;
869
870         /* Don't update huge pages here */
871         if (entry->huge)
872                 return;
873
874         for (level = 0, pbo = bo->parent; pbo; ++level)
875                 pbo = pbo->parent;
876
877         level += params->adev->vm_manager.root_level;
878         pt = amdgpu_bo_gpu_offset(entry->base.bo);
879         flags = AMDGPU_PTE_VALID;
880         amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
881         pde = (entry - parent->entries) * 8;
882         if (bo->shadow)
883                 params->func(params, bo->shadow, pde, pt, 1, 0, flags);
884         params->func(params, bo, pde, pt, 1, 0, flags);
885 }
886
887 /*
888  * amdgpu_vm_invalidate_level - mark all PD levels as invalid
889  *
890  * @parent: parent PD
891  *
892  * Mark all PD level as invalid after an error.
893  */
894 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
895                                        struct amdgpu_vm *vm,
896                                        struct amdgpu_vm_pt *parent,
897                                        unsigned level)
898 {
899         unsigned pt_idx, num_entries;
900
901         /*
902          * Recurse into the subdirectories. This recursion is harmless because
903          * we only have a maximum of 5 layers.
904          */
905         num_entries = amdgpu_vm_num_entries(adev, level);
906         for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
907                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
908
909                 if (!entry->base.bo)
910                         continue;
911
912                 if (list_empty(&entry->base.vm_status))
913                         list_add(&entry->base.vm_status, &vm->relocated);
914                 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
915         }
916 }
917
918 /*
919  * amdgpu_vm_update_directories - make sure that all directories are valid
920  *
921  * @adev: amdgpu_device pointer
922  * @vm: requested vm
923  *
924  * Makes sure all directories are up to date.
925  * Returns 0 for success, error for failure.
926  */
927 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
928                                  struct amdgpu_vm *vm)
929 {
930         struct amdgpu_pte_update_params params;
931         struct amdgpu_job *job;
932         unsigned ndw = 0;
933         int r = 0;
934
935         if (list_empty(&vm->relocated))
936                 return 0;
937
938 restart:
939         memset(&params, 0, sizeof(params));
940         params.adev = adev;
941
942         if (vm->use_cpu_for_update) {
943                 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
944                 if (unlikely(r))
945                         return r;
946
947                 params.func = amdgpu_vm_cpu_set_ptes;
948         } else {
949                 ndw = 512 * 8;
950                 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
951                 if (r)
952                         return r;
953
954                 params.ib = &job->ibs[0];
955                 params.func = amdgpu_vm_do_set_ptes;
956         }
957
958         while (!list_empty(&vm->relocated)) {
959                 struct amdgpu_vm_bo_base *bo_base, *parent;
960                 struct amdgpu_vm_pt *pt, *entry;
961                 struct amdgpu_bo *bo;
962
963                 bo_base = list_first_entry(&vm->relocated,
964                                            struct amdgpu_vm_bo_base,
965                                            vm_status);
966                 list_del_init(&bo_base->vm_status);
967
968                 bo = bo_base->bo->parent;
969                 if (!bo)
970                         continue;
971
972                 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
973                                           bo_list);
974                 pt = container_of(parent, struct amdgpu_vm_pt, base);
975                 entry = container_of(bo_base, struct amdgpu_vm_pt, base);
976
977                 amdgpu_vm_update_pde(&params, vm, pt, entry);
978
979                 if (!vm->use_cpu_for_update &&
980                     (ndw - params.ib->length_dw) < 32)
981                         break;
982         }
983
984         if (vm->use_cpu_for_update) {
985                 /* Flush HDP */
986                 mb();
987                 amdgpu_asic_flush_hdp(adev, NULL);
988         } else if (params.ib->length_dw == 0) {
989                 amdgpu_job_free(job);
990         } else {
991                 struct amdgpu_bo *root = vm->root.base.bo;
992                 struct amdgpu_ring *ring;
993                 struct dma_fence *fence;
994
995                 ring = container_of(vm->entity.sched, struct amdgpu_ring,
996                                     sched);
997
998                 amdgpu_ring_pad_ib(ring, params.ib);
999                 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
1000                                  AMDGPU_FENCE_OWNER_VM, false);
1001                 WARN_ON(params.ib->length_dw > ndw);
1002                 r = amdgpu_job_submit(job, ring, &vm->entity,
1003                                       AMDGPU_FENCE_OWNER_VM, &fence);
1004                 if (r)
1005                         goto error;
1006
1007                 amdgpu_bo_fence(root, fence, true);
1008                 dma_fence_put(vm->last_update);
1009                 vm->last_update = fence;
1010         }
1011
1012         if (!list_empty(&vm->relocated))
1013                 goto restart;
1014
1015         return 0;
1016
1017 error:
1018         amdgpu_vm_invalidate_level(adev, vm, &vm->root,
1019                                    adev->vm_manager.root_level);
1020         amdgpu_job_free(job);
1021         return r;
1022 }
1023
1024 /**
1025  * amdgpu_vm_find_entry - find the entry for an address
1026  *
1027  * @p: see amdgpu_pte_update_params definition
1028  * @addr: virtual address in question
1029  * @entry: resulting entry or NULL
1030  * @parent: parent entry
1031  *
1032  * Find the vm_pt entry and it's parent for the given address.
1033  */
1034 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
1035                          struct amdgpu_vm_pt **entry,
1036                          struct amdgpu_vm_pt **parent)
1037 {
1038         unsigned level = p->adev->vm_manager.root_level;
1039
1040         *parent = NULL;
1041         *entry = &p->vm->root;
1042         while ((*entry)->entries) {
1043                 unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1044
1045                 *parent = *entry;
1046                 *entry = &(*entry)->entries[addr >> shift];
1047                 addr &= (1ULL << shift) - 1;
1048         }
1049
1050         if (level != AMDGPU_VM_PTB)
1051                 *entry = NULL;
1052 }
1053
1054 /**
1055  * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
1056  *
1057  * @p: see amdgpu_pte_update_params definition
1058  * @entry: vm_pt entry to check
1059  * @parent: parent entry
1060  * @nptes: number of PTEs updated with this operation
1061  * @dst: destination address where the PTEs should point to
1062  * @flags: access flags fro the PTEs
1063  *
1064  * Check if we can update the PD with a huge page.
1065  */
1066 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
1067                                         struct amdgpu_vm_pt *entry,
1068                                         struct amdgpu_vm_pt *parent,
1069                                         unsigned nptes, uint64_t dst,
1070                                         uint64_t flags)
1071 {
1072         uint64_t pde;
1073
1074         /* In the case of a mixed PT the PDE must point to it*/
1075         if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
1076             nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
1077                 /* Set the huge page flag to stop scanning at this PDE */
1078                 flags |= AMDGPU_PDE_PTE;
1079         }
1080
1081         if (!(flags & AMDGPU_PDE_PTE)) {
1082                 if (entry->huge) {
1083                         /* Add the entry to the relocated list to update it. */
1084                         entry->huge = false;
1085                         list_move(&entry->base.vm_status, &p->vm->relocated);
1086                 }
1087                 return;
1088         }
1089
1090         entry->huge = true;
1091         amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
1092
1093         pde = (entry - parent->entries) * 8;
1094         if (parent->base.bo->shadow)
1095                 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
1096         p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
1097 }
1098
1099 /**
1100  * amdgpu_vm_update_ptes - make sure that page tables are valid
1101  *
1102  * @params: see amdgpu_pte_update_params definition
1103  * @vm: requested vm
1104  * @start: start of GPU address range
1105  * @end: end of GPU address range
1106  * @dst: destination address to map to, the next dst inside the function
1107  * @flags: mapping flags
1108  *
1109  * Update the page tables in the range @start - @end.
1110  * Returns 0 for success, -EINVAL for failure.
1111  */
1112 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
1113                                   uint64_t start, uint64_t end,
1114                                   uint64_t dst, uint64_t flags)
1115 {
1116         struct amdgpu_device *adev = params->adev;
1117         const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
1118
1119         uint64_t addr, pe_start;
1120         struct amdgpu_bo *pt;
1121         unsigned nptes;
1122
1123         /* walk over the address space and update the page tables */
1124         for (addr = start; addr < end; addr += nptes,
1125              dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
1126                 struct amdgpu_vm_pt *entry, *parent;
1127
1128                 amdgpu_vm_get_entry(params, addr, &entry, &parent);
1129                 if (!entry)
1130                         return -ENOENT;
1131
1132                 if ((addr & ~mask) == (end & ~mask))
1133                         nptes = end - addr;
1134                 else
1135                         nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
1136
1137                 amdgpu_vm_handle_huge_pages(params, entry, parent,
1138                                             nptes, dst, flags);
1139                 /* We don't need to update PTEs for huge pages */
1140                 if (entry->huge)
1141                         continue;
1142
1143                 pt = entry->base.bo;
1144                 pe_start = (addr & mask) * 8;
1145                 if (pt->shadow)
1146                         params->func(params, pt->shadow, pe_start, dst, nptes,
1147                                      AMDGPU_GPU_PAGE_SIZE, flags);
1148                 params->func(params, pt, pe_start, dst, nptes,
1149                              AMDGPU_GPU_PAGE_SIZE, flags);
1150         }
1151
1152         return 0;
1153 }
1154
1155 /*
1156  * amdgpu_vm_frag_ptes - add fragment information to PTEs
1157  *
1158  * @params: see amdgpu_pte_update_params definition
1159  * @vm: requested vm
1160  * @start: first PTE to handle
1161  * @end: last PTE to handle
1162  * @dst: addr those PTEs should point to
1163  * @flags: hw mapping flags
1164  * Returns 0 for success, -EINVAL for failure.
1165  */
1166 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params  *params,
1167                                 uint64_t start, uint64_t end,
1168                                 uint64_t dst, uint64_t flags)
1169 {
1170         /**
1171          * The MC L1 TLB supports variable sized pages, based on a fragment
1172          * field in the PTE. When this field is set to a non-zero value, page
1173          * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
1174          * flags are considered valid for all PTEs within the fragment range
1175          * and corresponding mappings are assumed to be physically contiguous.
1176          *
1177          * The L1 TLB can store a single PTE for the whole fragment,
1178          * significantly increasing the space available for translation
1179          * caching. This leads to large improvements in throughput when the
1180          * TLB is under pressure.
1181          *
1182          * The L2 TLB distributes small and large fragments into two
1183          * asymmetric partitions. The large fragment cache is significantly
1184          * larger. Thus, we try to use large fragments wherever possible.
1185          * Userspace can support this by aligning virtual base address and
1186          * allocation size to the fragment size.
1187          */
1188         unsigned max_frag = params->adev->vm_manager.fragment_size;
1189         int r;
1190
1191         /* system pages are non continuously */
1192         if (params->src || !(flags & AMDGPU_PTE_VALID))
1193                 return amdgpu_vm_update_ptes(params, start, end, dst, flags);
1194
1195         while (start != end) {
1196                 uint64_t frag_flags, frag_end;
1197                 unsigned frag;
1198
1199                 /* This intentionally wraps around if no bit is set */
1200                 frag = min((unsigned)ffs(start) - 1,
1201                            (unsigned)fls64(end - start) - 1);
1202                 if (frag >= max_frag) {
1203                         frag_flags = AMDGPU_PTE_FRAG(max_frag);
1204                         frag_end = end & ~((1ULL << max_frag) - 1);
1205                 } else {
1206                         frag_flags = AMDGPU_PTE_FRAG(frag);
1207                         frag_end = start + (1 << frag);
1208                 }
1209
1210                 r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
1211                                           flags | frag_flags);
1212                 if (r)
1213                         return r;
1214
1215                 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
1216                 start = frag_end;
1217         }
1218
1219         return 0;
1220 }
1221
1222 /**
1223  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
1224  *
1225  * @adev: amdgpu_device pointer
1226  * @exclusive: fence we need to sync to
1227  * @pages_addr: DMA addresses to use for mapping
1228  * @vm: requested vm
1229  * @start: start of mapped range
1230  * @last: last mapped entry
1231  * @flags: flags for the entries
1232  * @addr: addr to set the area to
1233  * @fence: optional resulting fence
1234  *
1235  * Fill in the page table entries between @start and @last.
1236  * Returns 0 for success, -EINVAL for failure.
1237  */
1238 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
1239                                        struct dma_fence *exclusive,
1240                                        dma_addr_t *pages_addr,
1241                                        struct amdgpu_vm *vm,
1242                                        uint64_t start, uint64_t last,
1243                                        uint64_t flags, uint64_t addr,
1244                                        struct dma_fence **fence)
1245 {
1246         struct amdgpu_ring *ring;
1247         void *owner = AMDGPU_FENCE_OWNER_VM;
1248         unsigned nptes, ncmds, ndw;
1249         struct amdgpu_job *job;
1250         struct amdgpu_pte_update_params params;
1251         struct dma_fence *f = NULL;
1252         int r;
1253
1254         memset(&params, 0, sizeof(params));
1255         params.adev = adev;
1256         params.vm = vm;
1257
1258         /* sync to everything on unmapping */
1259         if (!(flags & AMDGPU_PTE_VALID))
1260                 owner = AMDGPU_FENCE_OWNER_UNDEFINED;
1261
1262         if (vm->use_cpu_for_update) {
1263                 /* params.src is used as flag to indicate system Memory */
1264                 if (pages_addr)
1265                         params.src = ~0;
1266
1267                 /* Wait for PT BOs to be free. PTs share the same resv. object
1268                  * as the root PD BO
1269                  */
1270                 r = amdgpu_vm_wait_pd(adev, vm, owner);
1271                 if (unlikely(r))
1272                         return r;
1273
1274                 params.func = amdgpu_vm_cpu_set_ptes;
1275                 params.pages_addr = pages_addr;
1276                 return amdgpu_vm_frag_ptes(&params, start, last + 1,
1277                                            addr, flags);
1278         }
1279
1280         ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
1281
1282         nptes = last - start + 1;
1283
1284         /*
1285          * reserve space for two commands every (1 << BLOCK_SIZE)
1286          *  entries or 2k dwords (whatever is smaller)
1287          *
1288          * The second command is for the shadow pagetables.
1289          */
1290         if (vm->root.base.bo->shadow)
1291                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
1292         else
1293                 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
1294
1295         /* padding, etc. */
1296         ndw = 64;
1297
1298         if (pages_addr) {
1299                 /* copy commands needed */
1300                 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
1301
1302                 /* and also PTEs */
1303                 ndw += nptes * 2;
1304
1305                 params.func = amdgpu_vm_do_copy_ptes;
1306
1307         } else {
1308                 /* set page commands needed */
1309                 ndw += ncmds * 10;
1310
1311                 /* extra commands for begin/end fragments */
1312                 ndw += 2 * 10 * adev->vm_manager.fragment_size;
1313
1314                 params.func = amdgpu_vm_do_set_ptes;
1315         }
1316
1317         r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
1318         if (r)
1319                 return r;
1320
1321         params.ib = &job->ibs[0];
1322
1323         if (pages_addr) {
1324                 uint64_t *pte;
1325                 unsigned i;
1326
1327                 /* Put the PTEs at the end of the IB. */
1328                 i = ndw - nptes * 2;
1329                 pte= (uint64_t *)&(job->ibs->ptr[i]);
1330                 params.src = job->ibs->gpu_addr + i * 4;
1331
1332                 for (i = 0; i < nptes; ++i) {
1333                         pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
1334                                                     AMDGPU_GPU_PAGE_SIZE);
1335                         pte[i] |= flags;
1336                 }
1337                 addr = 0;
1338         }
1339
1340         r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
1341         if (r)
1342                 goto error_free;
1343
1344         r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
1345                              owner, false);
1346         if (r)
1347                 goto error_free;
1348
1349         r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
1350         if (r)
1351                 goto error_free;
1352
1353         r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
1354         if (r)
1355                 goto error_free;
1356
1357         amdgpu_ring_pad_ib(ring, params.ib);
1358         WARN_ON(params.ib->length_dw > ndw);
1359         r = amdgpu_job_submit(job, ring, &vm->entity,
1360                               AMDGPU_FENCE_OWNER_VM, &f);
1361         if (r)
1362                 goto error_free;
1363
1364         amdgpu_bo_fence(vm->root.base.bo, f, true);
1365         dma_fence_put(*fence);
1366         *fence = f;
1367         return 0;
1368
1369 error_free:
1370         amdgpu_job_free(job);
1371         return r;
1372 }
1373
1374 /**
1375  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
1376  *
1377  * @adev: amdgpu_device pointer
1378  * @exclusive: fence we need to sync to
1379  * @pages_addr: DMA addresses to use for mapping
1380  * @vm: requested vm
1381  * @mapping: mapped range and flags to use for the update
1382  * @flags: HW flags for the mapping
1383  * @nodes: array of drm_mm_nodes with the MC addresses
1384  * @fence: optional resulting fence
1385  *
1386  * Split the mapping into smaller chunks so that each update fits
1387  * into a SDMA IB.
1388  * Returns 0 for success, -EINVAL for failure.
1389  */
1390 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
1391                                       struct dma_fence *exclusive,
1392                                       dma_addr_t *pages_addr,
1393                                       struct amdgpu_vm *vm,
1394                                       struct amdgpu_bo_va_mapping *mapping,
1395                                       uint64_t flags,
1396                                       struct drm_mm_node *nodes,
1397                                       struct dma_fence **fence)
1398 {
1399         unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
1400         uint64_t pfn, start = mapping->start;
1401         int r;
1402
1403         /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1404          * but in case of something, we filter the flags in first place
1405          */
1406         if (!(mapping->flags & AMDGPU_PTE_READABLE))
1407                 flags &= ~AMDGPU_PTE_READABLE;
1408         if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
1409                 flags &= ~AMDGPU_PTE_WRITEABLE;
1410
1411         flags &= ~AMDGPU_PTE_EXECUTABLE;
1412         flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1413
1414         flags &= ~AMDGPU_PTE_MTYPE_MASK;
1415         flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
1416
1417         if ((mapping->flags & AMDGPU_PTE_PRT) &&
1418             (adev->asic_type >= CHIP_VEGA10)) {
1419                 flags |= AMDGPU_PTE_PRT;
1420                 flags &= ~AMDGPU_PTE_VALID;
1421         }
1422
1423         trace_amdgpu_vm_bo_update(mapping);
1424
1425         pfn = mapping->offset >> PAGE_SHIFT;
1426         if (nodes) {
1427                 while (pfn >= nodes->size) {
1428                         pfn -= nodes->size;
1429                         ++nodes;
1430                 }
1431         }
1432
1433         do {
1434                 dma_addr_t *dma_addr = NULL;
1435                 uint64_t max_entries;
1436                 uint64_t addr, last;
1437
1438                 if (nodes) {
1439                         addr = nodes->start << PAGE_SHIFT;
1440                         max_entries = (nodes->size - pfn) *
1441                                 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
1442                 } else {
1443                         addr = 0;
1444                         max_entries = S64_MAX;
1445                 }
1446
1447                 if (pages_addr) {
1448                         uint64_t count;
1449
1450                         max_entries = min(max_entries, 16ull * 1024ull);
1451                         for (count = 1; count < max_entries; ++count) {
1452                                 uint64_t idx = pfn + count;
1453
1454                                 if (pages_addr[idx] !=
1455                                     (pages_addr[idx - 1] + PAGE_SIZE))
1456                                         break;
1457                         }
1458
1459                         if (count < min_linear_pages) {
1460                                 addr = pfn << PAGE_SHIFT;
1461                                 dma_addr = pages_addr;
1462                         } else {
1463                                 addr = pages_addr[pfn];
1464                                 max_entries = count;
1465                         }
1466
1467                 } else if (flags & AMDGPU_PTE_VALID) {
1468                         addr += adev->vm_manager.vram_base_offset;
1469                         addr += pfn << PAGE_SHIFT;
1470                 }
1471
1472                 last = min((uint64_t)mapping->last, start + max_entries - 1);
1473                 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm,
1474                                                 start, last, flags, addr,
1475                                                 fence);
1476                 if (r)
1477                         return r;
1478
1479                 pfn += last - start + 1;
1480                 if (nodes && nodes->size == pfn) {
1481                         pfn = 0;
1482                         ++nodes;
1483                 }
1484                 start = last + 1;
1485
1486         } while (unlikely(start != mapping->last + 1));
1487
1488         return 0;
1489 }
1490
1491 /**
1492  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1493  *
1494  * @adev: amdgpu_device pointer
1495  * @bo_va: requested BO and VM object
1496  * @clear: if true clear the entries
1497  *
1498  * Fill in the page table entries for @bo_va.
1499  * Returns 0 for success, -EINVAL for failure.
1500  */
1501 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
1502                         struct amdgpu_bo_va *bo_va,
1503                         bool clear)
1504 {
1505         struct amdgpu_bo *bo = bo_va->base.bo;
1506         struct amdgpu_vm *vm = bo_va->base.vm;
1507         struct amdgpu_bo_va_mapping *mapping;
1508         dma_addr_t *pages_addr = NULL;
1509         struct ttm_mem_reg *mem;
1510         struct drm_mm_node *nodes;
1511         struct dma_fence *exclusive, **last_update;
1512         uint64_t flags;
1513         int r;
1514
1515         if (clear || !bo_va->base.bo) {
1516                 mem = NULL;
1517                 nodes = NULL;
1518                 exclusive = NULL;
1519         } else {
1520                 struct ttm_dma_tt *ttm;
1521
1522                 mem = &bo_va->base.bo->tbo.mem;
1523                 nodes = mem->mm_node;
1524                 if (mem->mem_type == TTM_PL_TT) {
1525                         ttm = container_of(bo_va->base.bo->tbo.ttm,
1526                                            struct ttm_dma_tt, ttm);
1527                         pages_addr = ttm->dma_address;
1528                 }
1529                 exclusive = reservation_object_get_excl(bo->tbo.resv);
1530         }
1531
1532         if (bo)
1533                 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1534         else
1535                 flags = 0x0;
1536
1537         if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
1538                 last_update = &vm->last_update;
1539         else
1540                 last_update = &bo_va->last_pt_update;
1541
1542         if (!clear && bo_va->base.moved) {
1543                 bo_va->base.moved = false;
1544                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1545
1546         } else if (bo_va->cleared != clear) {
1547                 list_splice_init(&bo_va->valids, &bo_va->invalids);
1548         }
1549
1550         list_for_each_entry(mapping, &bo_va->invalids, list) {
1551                 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
1552                                                mapping, flags, nodes,
1553                                                last_update);
1554                 if (r)
1555                         return r;
1556         }
1557
1558         if (vm->use_cpu_for_update) {
1559                 /* Flush HDP */
1560                 mb();
1561                 amdgpu_asic_flush_hdp(adev, NULL);
1562         }
1563
1564         spin_lock(&vm->moved_lock);
1565         list_del_init(&bo_va->base.vm_status);
1566         spin_unlock(&vm->moved_lock);
1567
1568         /* If the BO is not in its preferred location add it back to
1569          * the evicted list so that it gets validated again on the
1570          * next command submission.
1571          */
1572         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
1573             !(bo->preferred_domains &
1574             amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)))
1575                 list_add_tail(&bo_va->base.vm_status, &vm->evicted);
1576
1577         list_splice_init(&bo_va->invalids, &bo_va->valids);
1578         bo_va->cleared = clear;
1579
1580         if (trace_amdgpu_vm_bo_mapping_enabled()) {
1581                 list_for_each_entry(mapping, &bo_va->valids, list)
1582                         trace_amdgpu_vm_bo_mapping(mapping);
1583         }
1584
1585         return 0;
1586 }
1587
1588 /**
1589  * amdgpu_vm_update_prt_state - update the global PRT state
1590  */
1591 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1592 {
1593         unsigned long flags;
1594         bool enable;
1595
1596         spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1597         enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1598         adev->gmc.gmc_funcs->set_prt(adev, enable);
1599         spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1600 }
1601
1602 /**
1603  * amdgpu_vm_prt_get - add a PRT user
1604  */
1605 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1606 {
1607         if (!adev->gmc.gmc_funcs->set_prt)
1608                 return;
1609
1610         if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1611                 amdgpu_vm_update_prt_state(adev);
1612 }
1613
1614 /**
1615  * amdgpu_vm_prt_put - drop a PRT user
1616  */
1617 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1618 {
1619         if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1620                 amdgpu_vm_update_prt_state(adev);
1621 }
1622
1623 /**
1624  * amdgpu_vm_prt_cb - callback for updating the PRT status
1625  */
1626 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1627 {
1628         struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1629
1630         amdgpu_vm_prt_put(cb->adev);
1631         kfree(cb);
1632 }
1633
1634 /**
1635  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1636  */
1637 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1638                                  struct dma_fence *fence)
1639 {
1640         struct amdgpu_prt_cb *cb;
1641
1642         if (!adev->gmc.gmc_funcs->set_prt)
1643                 return;
1644
1645         cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1646         if (!cb) {
1647                 /* Last resort when we are OOM */
1648                 if (fence)
1649                         dma_fence_wait(fence, false);
1650
1651                 amdgpu_vm_prt_put(adev);
1652         } else {
1653                 cb->adev = adev;
1654                 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1655                                                      amdgpu_vm_prt_cb))
1656                         amdgpu_vm_prt_cb(fence, &cb->cb);
1657         }
1658 }
1659
1660 /**
1661  * amdgpu_vm_free_mapping - free a mapping
1662  *
1663  * @adev: amdgpu_device pointer
1664  * @vm: requested vm
1665  * @mapping: mapping to be freed
1666  * @fence: fence of the unmap operation
1667  *
1668  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1669  */
1670 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1671                                    struct amdgpu_vm *vm,
1672                                    struct amdgpu_bo_va_mapping *mapping,
1673                                    struct dma_fence *fence)
1674 {
1675         if (mapping->flags & AMDGPU_PTE_PRT)
1676                 amdgpu_vm_add_prt_cb(adev, fence);
1677         kfree(mapping);
1678 }
1679
1680 /**
1681  * amdgpu_vm_prt_fini - finish all prt mappings
1682  *
1683  * @adev: amdgpu_device pointer
1684  * @vm: requested vm
1685  *
1686  * Register a cleanup callback to disable PRT support after VM dies.
1687  */
1688 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1689 {
1690         struct reservation_object *resv = vm->root.base.bo->tbo.resv;
1691         struct dma_fence *excl, **shared;
1692         unsigned i, shared_count;
1693         int r;
1694
1695         r = reservation_object_get_fences_rcu(resv, &excl,
1696                                               &shared_count, &shared);
1697         if (r) {
1698                 /* Not enough memory to grab the fence list, as last resort
1699                  * block for all the fences to complete.
1700                  */
1701                 reservation_object_wait_timeout_rcu(resv, true, false,
1702                                                     MAX_SCHEDULE_TIMEOUT);
1703                 return;
1704         }
1705
1706         /* Add a callback for each fence in the reservation object */
1707         amdgpu_vm_prt_get(adev);
1708         amdgpu_vm_add_prt_cb(adev, excl);
1709
1710         for (i = 0; i < shared_count; ++i) {
1711                 amdgpu_vm_prt_get(adev);
1712                 amdgpu_vm_add_prt_cb(adev, shared[i]);
1713         }
1714
1715         kfree(shared);
1716 }
1717
1718 /**
1719  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1720  *
1721  * @adev: amdgpu_device pointer
1722  * @vm: requested vm
1723  * @fence: optional resulting fence (unchanged if no work needed to be done
1724  * or if an error occurred)
1725  *
1726  * Make sure all freed BOs are cleared in the PT.
1727  * Returns 0 for success.
1728  *
1729  * PTs have to be reserved and mutex must be locked!
1730  */
1731 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1732                           struct amdgpu_vm *vm,
1733                           struct dma_fence **fence)
1734 {
1735         struct amdgpu_bo_va_mapping *mapping;
1736         uint64_t init_pte_value = 0;
1737         struct dma_fence *f = NULL;
1738         int r;
1739
1740         while (!list_empty(&vm->freed)) {
1741                 mapping = list_first_entry(&vm->freed,
1742                         struct amdgpu_bo_va_mapping, list);
1743                 list_del(&mapping->list);
1744
1745                 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
1746                         init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
1747
1748                 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
1749                                                 mapping->start, mapping->last,
1750                                                 init_pte_value, 0, &f);
1751                 amdgpu_vm_free_mapping(adev, vm, mapping, f);
1752                 if (r) {
1753                         dma_fence_put(f);
1754                         return r;
1755                 }
1756         }
1757
1758         if (fence && f) {
1759                 dma_fence_put(*fence);
1760                 *fence = f;
1761         } else {
1762                 dma_fence_put(f);
1763         }
1764
1765         return 0;
1766
1767 }
1768
1769 /**
1770  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1771  *
1772  * @adev: amdgpu_device pointer
1773  * @vm: requested vm
1774  * @sync: sync object to add fences to
1775  *
1776  * Make sure all BOs which are moved are updated in the PTs.
1777  * Returns 0 for success.
1778  *
1779  * PTs have to be reserved!
1780  */
1781 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1782                            struct amdgpu_vm *vm)
1783 {
1784         bool clear;
1785         int r = 0;
1786
1787         spin_lock(&vm->moved_lock);
1788         while (!list_empty(&vm->moved)) {
1789                 struct amdgpu_bo_va *bo_va;
1790                 struct reservation_object *resv;
1791
1792                 bo_va = list_first_entry(&vm->moved,
1793                         struct amdgpu_bo_va, base.vm_status);
1794                 spin_unlock(&vm->moved_lock);
1795
1796                 resv = bo_va->base.bo->tbo.resv;
1797
1798                 /* Per VM BOs never need to bo cleared in the page tables */
1799                 if (resv == vm->root.base.bo->tbo.resv)
1800                         clear = false;
1801                 /* Try to reserve the BO to avoid clearing its ptes */
1802                 else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
1803                         clear = false;
1804                 /* Somebody else is using the BO right now */
1805                 else
1806                         clear = true;
1807
1808                 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1809                 if (r)
1810                         return r;
1811
1812                 if (!clear && resv != vm->root.base.bo->tbo.resv)
1813                         reservation_object_unlock(resv);
1814
1815                 spin_lock(&vm->moved_lock);
1816         }
1817         spin_unlock(&vm->moved_lock);
1818
1819         return r;
1820 }
1821
1822 /**
1823  * amdgpu_vm_bo_add - add a bo to a specific vm
1824  *
1825  * @adev: amdgpu_device pointer
1826  * @vm: requested vm
1827  * @bo: amdgpu buffer object
1828  *
1829  * Add @bo into the requested vm.
1830  * Add @bo to the list of bos associated with the vm
1831  * Returns newly added bo_va or NULL for failure
1832  *
1833  * Object has to be reserved!
1834  */
1835 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1836                                       struct amdgpu_vm *vm,
1837                                       struct amdgpu_bo *bo)
1838 {
1839         struct amdgpu_bo_va *bo_va;
1840
1841         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1842         if (bo_va == NULL) {
1843                 return NULL;
1844         }
1845         amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1846
1847         bo_va->ref_count = 1;
1848         INIT_LIST_HEAD(&bo_va->valids);
1849         INIT_LIST_HEAD(&bo_va->invalids);
1850
1851         return bo_va;
1852 }
1853
1854
1855 /**
1856  * amdgpu_vm_bo_insert_mapping - insert a new mapping
1857  *
1858  * @adev: amdgpu_device pointer
1859  * @bo_va: bo_va to store the address
1860  * @mapping: the mapping to insert
1861  *
1862  * Insert a new mapping into all structures.
1863  */
1864 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1865                                     struct amdgpu_bo_va *bo_va,
1866                                     struct amdgpu_bo_va_mapping *mapping)
1867 {
1868         struct amdgpu_vm *vm = bo_va->base.vm;
1869         struct amdgpu_bo *bo = bo_va->base.bo;
1870
1871         mapping->bo_va = bo_va;
1872         list_add(&mapping->list, &bo_va->invalids);
1873         amdgpu_vm_it_insert(mapping, &vm->va);
1874
1875         if (mapping->flags & AMDGPU_PTE_PRT)
1876                 amdgpu_vm_prt_get(adev);
1877
1878         if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
1879                 spin_lock(&vm->moved_lock);
1880                 if (list_empty(&bo_va->base.vm_status))
1881                         list_add(&bo_va->base.vm_status, &vm->moved);
1882                 spin_unlock(&vm->moved_lock);
1883         }
1884         trace_amdgpu_vm_bo_map(bo_va, mapping);
1885 }
1886
1887 /**
1888  * amdgpu_vm_bo_map - map bo inside a vm
1889  *
1890  * @adev: amdgpu_device pointer
1891  * @bo_va: bo_va to store the address
1892  * @saddr: where to map the BO
1893  * @offset: requested offset in the BO
1894  * @flags: attributes of pages (read/write/valid/etc.)
1895  *
1896  * Add a mapping of the BO at the specefied addr into the VM.
1897  * Returns 0 for success, error for failure.
1898  *
1899  * Object has to be reserved and unreserved outside!
1900  */
1901 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1902                      struct amdgpu_bo_va *bo_va,
1903                      uint64_t saddr, uint64_t offset,
1904                      uint64_t size, uint64_t flags)
1905 {
1906         struct amdgpu_bo_va_mapping *mapping, *tmp;
1907         struct amdgpu_bo *bo = bo_va->base.bo;
1908         struct amdgpu_vm *vm = bo_va->base.vm;
1909         uint64_t eaddr;
1910
1911         /* validate the parameters */
1912         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1913             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1914                 return -EINVAL;
1915
1916         /* make sure object fit at this offset */
1917         eaddr = saddr + size - 1;
1918         if (saddr >= eaddr ||
1919             (bo && offset + size > amdgpu_bo_size(bo)))
1920                 return -EINVAL;
1921
1922         saddr /= AMDGPU_GPU_PAGE_SIZE;
1923         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1924
1925         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1926         if (tmp) {
1927                 /* bo and tmp overlap, invalid addr */
1928                 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1929                         "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1930                         tmp->start, tmp->last + 1);
1931                 return -EINVAL;
1932         }
1933
1934         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1935         if (!mapping)
1936                 return -ENOMEM;
1937
1938         mapping->start = saddr;
1939         mapping->last = eaddr;
1940         mapping->offset = offset;
1941         mapping->flags = flags;
1942
1943         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1944
1945         return 0;
1946 }
1947
1948 /**
1949  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1950  *
1951  * @adev: amdgpu_device pointer
1952  * @bo_va: bo_va to store the address
1953  * @saddr: where to map the BO
1954  * @offset: requested offset in the BO
1955  * @flags: attributes of pages (read/write/valid/etc.)
1956  *
1957  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1958  * mappings as we do so.
1959  * Returns 0 for success, error for failure.
1960  *
1961  * Object has to be reserved and unreserved outside!
1962  */
1963 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1964                              struct amdgpu_bo_va *bo_va,
1965                              uint64_t saddr, uint64_t offset,
1966                              uint64_t size, uint64_t flags)
1967 {
1968         struct amdgpu_bo_va_mapping *mapping;
1969         struct amdgpu_bo *bo = bo_va->base.bo;
1970         uint64_t eaddr;
1971         int r;
1972
1973         /* validate the parameters */
1974         if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
1975             size == 0 || size & AMDGPU_GPU_PAGE_MASK)
1976                 return -EINVAL;
1977
1978         /* make sure object fit at this offset */
1979         eaddr = saddr + size - 1;
1980         if (saddr >= eaddr ||
1981             (bo && offset + size > amdgpu_bo_size(bo)))
1982                 return -EINVAL;
1983
1984         /* Allocate all the needed memory */
1985         mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1986         if (!mapping)
1987                 return -ENOMEM;
1988
1989         r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1990         if (r) {
1991                 kfree(mapping);
1992                 return r;
1993         }
1994
1995         saddr /= AMDGPU_GPU_PAGE_SIZE;
1996         eaddr /= AMDGPU_GPU_PAGE_SIZE;
1997
1998         mapping->start = saddr;
1999         mapping->last = eaddr;
2000         mapping->offset = offset;
2001         mapping->flags = flags;
2002
2003         amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
2004
2005         return 0;
2006 }
2007
2008 /**
2009  * amdgpu_vm_bo_unmap - remove bo mapping from vm
2010  *
2011  * @adev: amdgpu_device pointer
2012  * @bo_va: bo_va to remove the address from
2013  * @saddr: where to the BO is mapped
2014  *
2015  * Remove a mapping of the BO at the specefied addr from the VM.
2016  * Returns 0 for success, error for failure.
2017  *
2018  * Object has to be reserved and unreserved outside!
2019  */
2020 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
2021                        struct amdgpu_bo_va *bo_va,
2022                        uint64_t saddr)
2023 {
2024         struct amdgpu_bo_va_mapping *mapping;
2025         struct amdgpu_vm *vm = bo_va->base.vm;
2026         bool valid = true;
2027
2028         saddr /= AMDGPU_GPU_PAGE_SIZE;
2029
2030         list_for_each_entry(mapping, &bo_va->valids, list) {
2031                 if (mapping->start == saddr)
2032                         break;
2033         }
2034
2035         if (&mapping->list == &bo_va->valids) {
2036                 valid = false;
2037
2038                 list_for_each_entry(mapping, &bo_va->invalids, list) {
2039                         if (mapping->start == saddr)
2040                                 break;
2041                 }
2042
2043                 if (&mapping->list == &bo_va->invalids)
2044                         return -ENOENT;
2045         }
2046
2047         list_del(&mapping->list);
2048         amdgpu_vm_it_remove(mapping, &vm->va);
2049         mapping->bo_va = NULL;
2050         trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2051
2052         if (valid)
2053                 list_add(&mapping->list, &vm->freed);
2054         else
2055                 amdgpu_vm_free_mapping(adev, vm, mapping,
2056                                        bo_va->last_pt_update);
2057
2058         return 0;
2059 }
2060
2061 /**
2062  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2063  *
2064  * @adev: amdgpu_device pointer
2065  * @vm: VM structure to use
2066  * @saddr: start of the range
2067  * @size: size of the range
2068  *
2069  * Remove all mappings in a range, split them as appropriate.
2070  * Returns 0 for success, error for failure.
2071  */
2072 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2073                                 struct amdgpu_vm *vm,
2074                                 uint64_t saddr, uint64_t size)
2075 {
2076         struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2077         LIST_HEAD(removed);
2078         uint64_t eaddr;
2079
2080         eaddr = saddr + size - 1;
2081         saddr /= AMDGPU_GPU_PAGE_SIZE;
2082         eaddr /= AMDGPU_GPU_PAGE_SIZE;
2083
2084         /* Allocate all the needed memory */
2085         before = kzalloc(sizeof(*before), GFP_KERNEL);
2086         if (!before)
2087                 return -ENOMEM;
2088         INIT_LIST_HEAD(&before->list);
2089
2090         after = kzalloc(sizeof(*after), GFP_KERNEL);
2091         if (!after) {
2092                 kfree(before);
2093                 return -ENOMEM;
2094         }
2095         INIT_LIST_HEAD(&after->list);
2096
2097         /* Now gather all removed mappings */
2098         tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2099         while (tmp) {
2100                 /* Remember mapping split at the start */
2101                 if (tmp->start < saddr) {
2102                         before->start = tmp->start;
2103                         before->last = saddr - 1;
2104                         before->offset = tmp->offset;
2105                         before->flags = tmp->flags;
2106                         list_add(&before->list, &tmp->list);
2107                 }
2108
2109                 /* Remember mapping split at the end */
2110                 if (tmp->last > eaddr) {
2111                         after->start = eaddr + 1;
2112                         after->last = tmp->last;
2113                         after->offset = tmp->offset;
2114                         after->offset += after->start - tmp->start;
2115                         after->flags = tmp->flags;
2116                         list_add(&after->list, &tmp->list);
2117                 }
2118
2119                 list_del(&tmp->list);
2120                 list_add(&tmp->list, &removed);
2121
2122                 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2123         }
2124
2125         /* And free them up */
2126         list_for_each_entry_safe(tmp, next, &removed, list) {
2127                 amdgpu_vm_it_remove(tmp, &vm->va);
2128                 list_del(&tmp->list);
2129
2130                 if (tmp->start < saddr)
2131                     tmp->start = saddr;
2132                 if (tmp->last > eaddr)
2133                     tmp->last = eaddr;
2134
2135                 tmp->bo_va = NULL;
2136                 list_add(&tmp->list, &vm->freed);
2137                 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2138         }
2139
2140         /* Insert partial mapping before the range */
2141         if (!list_empty(&before->list)) {
2142                 amdgpu_vm_it_insert(before, &vm->va);
2143                 if (before->flags & AMDGPU_PTE_PRT)
2144                         amdgpu_vm_prt_get(adev);
2145         } else {
2146                 kfree(before);
2147         }
2148
2149         /* Insert partial mapping after the range */
2150         if (!list_empty(&after->list)) {
2151                 amdgpu_vm_it_insert(after, &vm->va);
2152                 if (after->flags & AMDGPU_PTE_PRT)
2153                         amdgpu_vm_prt_get(adev);
2154         } else {
2155                 kfree(after);
2156         }
2157
2158         return 0;
2159 }
2160
2161 /**
2162  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2163  *
2164  * @vm: the requested VM
2165  *
2166  * Find a mapping by it's address.
2167  */
2168 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2169                                                          uint64_t addr)
2170 {
2171         return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2172 }
2173
2174 /**
2175  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
2176  *
2177  * @adev: amdgpu_device pointer
2178  * @bo_va: requested bo_va
2179  *
2180  * Remove @bo_va->bo from the requested vm.
2181  *
2182  * Object have to be reserved!
2183  */
2184 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
2185                       struct amdgpu_bo_va *bo_va)
2186 {
2187         struct amdgpu_bo_va_mapping *mapping, *next;
2188         struct amdgpu_vm *vm = bo_va->base.vm;
2189
2190         list_del(&bo_va->base.bo_list);
2191
2192         spin_lock(&vm->moved_lock);
2193         list_del(&bo_va->base.vm_status);
2194         spin_unlock(&vm->moved_lock);
2195
2196         list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2197                 list_del(&mapping->list);
2198                 amdgpu_vm_it_remove(mapping, &vm->va);
2199                 mapping->bo_va = NULL;
2200                 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2201                 list_add(&mapping->list, &vm->freed);
2202         }
2203         list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2204                 list_del(&mapping->list);
2205                 amdgpu_vm_it_remove(mapping, &vm->va);
2206                 amdgpu_vm_free_mapping(adev, vm, mapping,
2207                                        bo_va->last_pt_update);
2208         }
2209
2210         dma_fence_put(bo_va->last_pt_update);
2211         kfree(bo_va);
2212 }
2213
2214 /**
2215  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2216  *
2217  * @adev: amdgpu_device pointer
2218  * @vm: requested vm
2219  * @bo: amdgpu buffer object
2220  *
2221  * Mark @bo as invalid.
2222  */
2223 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
2224                              struct amdgpu_bo *bo, bool evicted)
2225 {
2226         struct amdgpu_vm_bo_base *bo_base;
2227
2228         /* shadow bo doesn't have bo base, its validation needs its parent */
2229         if (bo->parent && bo->parent->shadow == bo)
2230                 bo = bo->parent;
2231
2232         list_for_each_entry(bo_base, &bo->va, bo_list) {
2233                 struct amdgpu_vm *vm = bo_base->vm;
2234
2235                 bo_base->moved = true;
2236                 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
2237                         if (bo->tbo.type == ttm_bo_type_kernel)
2238                                 list_move(&bo_base->vm_status, &vm->evicted);
2239                         else
2240                                 list_move_tail(&bo_base->vm_status,
2241                                                &vm->evicted);
2242                         continue;
2243                 }
2244
2245                 if (bo->tbo.type == ttm_bo_type_kernel) {
2246                         if (list_empty(&bo_base->vm_status))
2247                                 list_add(&bo_base->vm_status, &vm->relocated);
2248                         continue;
2249                 }
2250
2251                 spin_lock(&bo_base->vm->moved_lock);
2252                 if (list_empty(&bo_base->vm_status))
2253                         list_add(&bo_base->vm_status, &vm->moved);
2254                 spin_unlock(&bo_base->vm->moved_lock);
2255         }
2256 }
2257
2258 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2259 {
2260         /* Total bits covered by PD + PTs */
2261         unsigned bits = ilog2(vm_size) + 18;
2262
2263         /* Make sure the PD is 4K in size up to 8GB address space.
2264            Above that split equal between PD and PTs */
2265         if (vm_size <= 8)
2266                 return (bits - 9);
2267         else
2268                 return ((bits + 3) / 2);
2269 }
2270
2271 /**
2272  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2273  *
2274  * @adev: amdgpu_device pointer
2275  * @vm_size: the default vm size if it's set auto
2276  */
2277 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
2278                            uint32_t fragment_size_default, unsigned max_level,
2279                            unsigned max_bits)
2280 {
2281         uint64_t tmp;
2282
2283         /* adjust vm size first */
2284         if (amdgpu_vm_size != -1) {
2285                 unsigned max_size = 1 << (max_bits - 30);
2286
2287                 vm_size = amdgpu_vm_size;
2288                 if (vm_size > max_size) {
2289                         dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2290                                  amdgpu_vm_size, max_size);
2291                         vm_size = max_size;
2292                 }
2293         }
2294
2295         adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2296
2297         tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2298         if (amdgpu_vm_block_size != -1)
2299                 tmp >>= amdgpu_vm_block_size - 9;
2300         tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2301         adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
2302         switch (adev->vm_manager.num_level) {
2303         case 3:
2304                 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2305                 break;
2306         case 2:
2307                 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2308                 break;
2309         case 1:
2310                 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2311                 break;
2312         default:
2313                 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2314         }
2315         /* block size depends on vm size and hw setup*/
2316         if (amdgpu_vm_block_size != -1)
2317                 adev->vm_manager.block_size =
2318                         min((unsigned)amdgpu_vm_block_size, max_bits
2319                             - AMDGPU_GPU_PAGE_SHIFT
2320                             - 9 * adev->vm_manager.num_level);
2321         else if (adev->vm_manager.num_level > 1)
2322                 adev->vm_manager.block_size = 9;
2323         else
2324                 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2325
2326         if (amdgpu_vm_fragment_size == -1)
2327                 adev->vm_manager.fragment_size = fragment_size_default;
2328         else
2329                 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2330
2331         DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2332                  vm_size, adev->vm_manager.num_level + 1,
2333                  adev->vm_manager.block_size,
2334                  adev->vm_manager.fragment_size);
2335 }
2336
2337 /**
2338  * amdgpu_vm_init - initialize a vm instance
2339  *
2340  * @adev: amdgpu_device pointer
2341  * @vm: requested vm
2342  * @vm_context: Indicates if it GFX or Compute context
2343  *
2344  * Init @vm fields.
2345  */
2346 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2347                    int vm_context, unsigned int pasid)
2348 {
2349         struct amdgpu_bo_param bp;
2350         struct amdgpu_bo *root;
2351         const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
2352                 AMDGPU_VM_PTE_COUNT(adev) * 8);
2353         unsigned ring_instance;
2354         struct amdgpu_ring *ring;
2355         struct drm_sched_rq *rq;
2356         unsigned long size;
2357         uint64_t flags;
2358         int r, i;
2359
2360         vm->va = RB_ROOT_CACHED;
2361         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2362                 vm->reserved_vmid[i] = NULL;
2363         INIT_LIST_HEAD(&vm->evicted);
2364         INIT_LIST_HEAD(&vm->relocated);
2365         spin_lock_init(&vm->moved_lock);
2366         INIT_LIST_HEAD(&vm->moved);
2367         INIT_LIST_HEAD(&vm->freed);
2368
2369         /* create scheduler entity for page table updates */
2370
2371         ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
2372         ring_instance %= adev->vm_manager.vm_pte_num_rings;
2373         ring = adev->vm_manager.vm_pte_rings[ring_instance];
2374         rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
2375         r = drm_sched_entity_init(&ring->sched, &vm->entity,
2376                                   rq, NULL);
2377         if (r)
2378                 return r;
2379
2380         vm->pte_support_ats = false;
2381
2382         if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
2383                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2384                                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2385
2386                 if (adev->asic_type == CHIP_RAVEN)
2387                         vm->pte_support_ats = true;
2388         } else {
2389                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2390                                                 AMDGPU_VM_USE_CPU_FOR_GFX);
2391         }
2392         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2393                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2394         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2395                   "CPU update of VM recommended only for large BAR system\n");
2396         vm->last_update = NULL;
2397
2398         flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
2399         if (vm->use_cpu_for_update)
2400                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
2401         else
2402                 flags |= AMDGPU_GEM_CREATE_SHADOW;
2403
2404         size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
2405         memset(&bp, 0, sizeof(bp));
2406         bp.size = size;
2407         bp.byte_align = align;
2408         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
2409         bp.flags = flags;
2410         bp.type = ttm_bo_type_kernel;
2411         bp.resv = NULL;
2412         r = amdgpu_bo_create(adev, &bp, &root);
2413         if (r)
2414                 goto error_free_sched_entity;
2415
2416         r = amdgpu_bo_reserve(root, true);
2417         if (r)
2418                 goto error_free_root;
2419
2420         r = amdgpu_vm_clear_bo(adev, vm, root,
2421                                adev->vm_manager.root_level,
2422                                vm->pte_support_ats);
2423         if (r)
2424                 goto error_unreserve;
2425
2426         amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
2427         amdgpu_bo_unreserve(vm->root.base.bo);
2428
2429         if (pasid) {
2430                 unsigned long flags;
2431
2432                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2433                 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
2434                               GFP_ATOMIC);
2435                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2436                 if (r < 0)
2437                         goto error_free_root;
2438
2439                 vm->pasid = pasid;
2440         }
2441
2442         INIT_KFIFO(vm->faults);
2443         vm->fault_credit = 16;
2444
2445         return 0;
2446
2447 error_unreserve:
2448         amdgpu_bo_unreserve(vm->root.base.bo);
2449
2450 error_free_root:
2451         amdgpu_bo_unref(&vm->root.base.bo->shadow);
2452         amdgpu_bo_unref(&vm->root.base.bo);
2453         vm->root.base.bo = NULL;
2454
2455 error_free_sched_entity:
2456         drm_sched_entity_fini(&ring->sched, &vm->entity);
2457
2458         return r;
2459 }
2460
2461 /**
2462  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2463  *
2464  * This only works on GFX VMs that don't have any BOs added and no
2465  * page tables allocated yet.
2466  *
2467  * Changes the following VM parameters:
2468  * - use_cpu_for_update
2469  * - pte_supports_ats
2470  * - pasid (old PASID is released, because compute manages its own PASIDs)
2471  *
2472  * Reinitializes the page directory to reflect the changed ATS
2473  * setting. May leave behind an unused shadow BO for the page
2474  * directory when switching from SDMA updates to CPU updates.
2475  *
2476  * Returns 0 for success, -errno for errors.
2477  */
2478 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2479 {
2480         bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
2481         int r;
2482
2483         r = amdgpu_bo_reserve(vm->root.base.bo, true);
2484         if (r)
2485                 return r;
2486
2487         /* Sanity checks */
2488         if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
2489                 r = -EINVAL;
2490                 goto error;
2491         }
2492
2493         /* Check if PD needs to be reinitialized and do it before
2494          * changing any other state, in case it fails.
2495          */
2496         if (pte_support_ats != vm->pte_support_ats) {
2497                 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
2498                                adev->vm_manager.root_level,
2499                                pte_support_ats);
2500                 if (r)
2501                         goto error;
2502         }
2503
2504         /* Update VM state */
2505         vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2506                                     AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2507         vm->pte_support_ats = pte_support_ats;
2508         DRM_DEBUG_DRIVER("VM update mode is %s\n",
2509                          vm->use_cpu_for_update ? "CPU" : "SDMA");
2510         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
2511                   "CPU update of VM recommended only for large BAR system\n");
2512
2513         if (vm->pasid) {
2514                 unsigned long flags;
2515
2516                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2517                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2518                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2519
2520                 vm->pasid = 0;
2521         }
2522
2523 error:
2524         amdgpu_bo_unreserve(vm->root.base.bo);
2525         return r;
2526 }
2527
2528 /**
2529  * amdgpu_vm_free_levels - free PD/PT levels
2530  *
2531  * @adev: amdgpu device structure
2532  * @parent: PD/PT starting level to free
2533  * @level: level of parent structure
2534  *
2535  * Free the page directory or page table level and all sub levels.
2536  */
2537 static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
2538                                   struct amdgpu_vm_pt *parent,
2539                                   unsigned level)
2540 {
2541         unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
2542
2543         if (parent->base.bo) {
2544                 list_del(&parent->base.bo_list);
2545                 list_del(&parent->base.vm_status);
2546                 amdgpu_bo_unref(&parent->base.bo->shadow);
2547                 amdgpu_bo_unref(&parent->base.bo);
2548         }
2549
2550         if (parent->entries)
2551                 for (i = 0; i < num_entries; i++)
2552                         amdgpu_vm_free_levels(adev, &parent->entries[i],
2553                                               level + 1);
2554
2555         kvfree(parent->entries);
2556 }
2557
2558 /**
2559  * amdgpu_vm_fini - tear down a vm instance
2560  *
2561  * @adev: amdgpu_device pointer
2562  * @vm: requested vm
2563  *
2564  * Tear down @vm.
2565  * Unbind the VM and remove all bos from the vm bo list
2566  */
2567 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2568 {
2569         struct amdgpu_bo_va_mapping *mapping, *tmp;
2570         bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2571         struct amdgpu_bo *root;
2572         u64 fault;
2573         int i, r;
2574
2575         amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2576
2577         /* Clear pending page faults from IH when the VM is destroyed */
2578         while (kfifo_get(&vm->faults, &fault))
2579                 amdgpu_ih_clear_fault(adev, fault);
2580
2581         if (vm->pasid) {
2582                 unsigned long flags;
2583
2584                 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
2585                 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
2586                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
2587         }
2588
2589         drm_sched_entity_fini(vm->entity.sched, &vm->entity);
2590
2591         if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2592                 dev_err(adev->dev, "still active bo inside vm\n");
2593         }
2594         rbtree_postorder_for_each_entry_safe(mapping, tmp,
2595                                              &vm->va.rb_root, rb) {
2596                 list_del(&mapping->list);
2597                 amdgpu_vm_it_remove(mapping, &vm->va);
2598                 kfree(mapping);
2599         }
2600         list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2601                 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
2602                         amdgpu_vm_prt_fini(adev, vm);
2603                         prt_fini_needed = false;
2604                 }
2605
2606                 list_del(&mapping->list);
2607                 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2608         }
2609
2610         root = amdgpu_bo_ref(vm->root.base.bo);
2611         r = amdgpu_bo_reserve(root, true);
2612         if (r) {
2613                 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
2614         } else {
2615                 amdgpu_vm_free_levels(adev, &vm->root,
2616                                       adev->vm_manager.root_level);
2617                 amdgpu_bo_unreserve(root);
2618         }
2619         amdgpu_bo_unref(&root);
2620         dma_fence_put(vm->last_update);
2621         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2622                 amdgpu_vmid_free_reserved(adev, vm, i);
2623 }
2624
2625 /**
2626  * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2627  *
2628  * @adev: amdgpu_device pointer
2629  * @pasid: PASID do identify the VM
2630  *
2631  * This function is expected to be called in interrupt context. Returns
2632  * true if there was fault credit, false otherwise
2633  */
2634 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2635                                   unsigned int pasid)
2636 {
2637         struct amdgpu_vm *vm;
2638
2639         spin_lock(&adev->vm_manager.pasid_lock);
2640         vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2641         if (!vm) {
2642                 /* VM not found, can't track fault credit */
2643                 spin_unlock(&adev->vm_manager.pasid_lock);
2644                 return true;
2645         }
2646
2647         /* No lock needed. only accessed by IRQ handler */
2648         if (!vm->fault_credit) {
2649                 /* Too many faults in this VM */
2650                 spin_unlock(&adev->vm_manager.pasid_lock);
2651                 return false;
2652         }
2653
2654         vm->fault_credit--;
2655         spin_unlock(&adev->vm_manager.pasid_lock);
2656         return true;
2657 }
2658
2659 /**
2660  * amdgpu_vm_manager_init - init the VM manager
2661  *
2662  * @adev: amdgpu_device pointer
2663  *
2664  * Initialize the VM manager structures
2665  */
2666 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2667 {
2668         unsigned i;
2669
2670         amdgpu_vmid_mgr_init(adev);
2671
2672         adev->vm_manager.fence_context =
2673                 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2674         for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2675                 adev->vm_manager.seqno[i] = 0;
2676
2677         atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
2678         spin_lock_init(&adev->vm_manager.prt_lock);
2679         atomic_set(&adev->vm_manager.num_prt_users, 0);
2680
2681         /* If not overridden by the user, by default, only in large BAR systems
2682          * Compute VM tables will be updated by CPU
2683          */
2684 #ifdef CONFIG_X86_64
2685         if (amdgpu_vm_update_mode == -1) {
2686                 if (amdgpu_vm_is_large_bar(adev))
2687                         adev->vm_manager.vm_update_mode =
2688                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2689                 else
2690                         adev->vm_manager.vm_update_mode = 0;
2691         } else
2692                 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2693 #else
2694         adev->vm_manager.vm_update_mode = 0;
2695 #endif
2696
2697         idr_init(&adev->vm_manager.pasid_idr);
2698         spin_lock_init(&adev->vm_manager.pasid_lock);
2699 }
2700
2701 /**
2702  * amdgpu_vm_manager_fini - cleanup VM manager
2703  *
2704  * @adev: amdgpu_device pointer
2705  *
2706  * Cleanup the VM manager and free resources.
2707  */
2708 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2709 {
2710         WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
2711         idr_destroy(&adev->vm_manager.pasid_idr);
2712
2713         amdgpu_vmid_mgr_fini(adev);
2714 }
2715
2716 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2717 {
2718         union drm_amdgpu_vm *args = data;
2719         struct amdgpu_device *adev = dev->dev_private;
2720         struct amdgpu_fpriv *fpriv = filp->driver_priv;
2721         int r;
2722
2723         switch (args->in.op) {
2724         case AMDGPU_VM_OP_RESERVE_VMID:
2725                 /* current, we only have requirement to reserve vmid from gfxhub */
2726                 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2727                 if (r)
2728                         return r;
2729                 break;
2730         case AMDGPU_VM_OP_UNRESERVE_VMID:
2731                 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
2732                 break;
2733         default:
2734                 return -EINVAL;
2735         }
2736
2737         return 0;
2738 }
This page took 0.198128 seconds and 4 git commands to generate.