Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 5afbc5e714d0849b43389d6c0cd8e360d4c72b47..da55a78d7380bdd0da7ce9120bb0f4bf9f460ffe 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
  #include <drm/amdgpu_drm.h>
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
  
  /*
   * GPUVM
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params {
         /* indirect buffer to fill with commands */
         struct amdgpu_ib *ib;
         /* Function which actually does the update */
-       void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+       void (*func)(struct amdgpu_pte_update_params *params,
+                    struct amdgpu_bo *bo, uint64_t pe,
                      uint64_t addr, unsigned count, uint32_t incr,
                      uint64_t flags);
         /* The next two are used during VM update by CPU
@@ -256,6 +258,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
         return ready;
  }
  
+/**
+ * amdgpu_vm_clear_bo - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: BO to clear
+ * @level: level this BO is at
+ *
+ * Root PD needs to be reserved when calling this.
+ */
+static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+                             struct amdgpu_vm *vm, struct amdgpu_bo *bo,
+                             unsigned level, bool pte_support_ats)
+{
+       struct ttm_operation_ctx ctx = { true, false };
+       struct dma_fence *fence = NULL;
+       unsigned entries, ats_entries;
+       struct amdgpu_ring *ring;
+       struct amdgpu_job *job;
+       uint64_t addr;
+       int r;
+
+       addr = amdgpu_bo_gpu_offset(bo);
+       entries = amdgpu_bo_size(bo) / 8;
+
+       if (pte_support_ats) {
+               if (level == adev->vm_manager.root_level) {
+                       ats_entries = amdgpu_vm_level_shift(adev, level);
+                       ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+                       ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+                       ats_entries = min(ats_entries, entries);
+                       entries -= ats_entries;
+               } else {
+                       ats_entries = entries;
+                       entries = 0;
+               }
+       } else {
+               ats_entries = 0;
+       }
+
+       ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+       r = reservation_object_reserve_shared(bo->tbo.resv);
+       if (r)
+               return r;
+
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (r)
+               goto error;
+
+       r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+       if (r)
+               goto error;
+
+       if (ats_entries) {
+               uint64_t ats_value;
+
+               ats_value = AMDGPU_PTE_DEFAULT_ATC;
+               if (level != AMDGPU_VM_PTB)
+                       ats_value |= AMDGPU_PDE_PTE;
+
+               amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+                                     ats_entries, 0, ats_value);
+               addr += ats_entries * 8;
+       }
+
+       if (entries)
+               amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+                                     entries, 0, 0);
+
+       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+       WARN_ON(job->ibs[0].length_dw > 64);
+       r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+                            AMDGPU_FENCE_OWNER_UNDEFINED, false);
+       if (r)
+               goto error_free;
+
+       r = amdgpu_job_submit(job, ring, &vm->entity,
+                             AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+       if (r)
+               goto error_free;
+
+       amdgpu_bo_fence(bo, fence, true);
+       dma_fence_put(fence);
+
+       if (bo->shadow)
+               return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
+                                         level, pte_support_ats);
+
+       return 0;
+
+error_free:
+       amdgpu_job_free(job);
+
+error:
+       return r;
+}
+
  /**
   * amdgpu_vm_alloc_levels - allocate the PD/PT levels
   *
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                                   struct amdgpu_vm *vm,
                                   struct amdgpu_vm_pt *parent,
                                   uint64_t saddr, uint64_t eaddr,
-                                 unsigned level)
+                                 unsigned level, bool ats)
  {
         unsigned shift = amdgpu_vm_level_shift(adev, level);
         unsigned pt_idx, from, to;
-       int r;
         u64 flags;
-       uint64_t init_value = 0;
+       int r;
  
         if (!parent->entries) {
                 unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
         saddr = saddr & ((1 << shift) - 1);
         eaddr = eaddr & ((1 << shift) - 1);
  
-       flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-                       AMDGPU_GEM_CREATE_VRAM_CLEARED;
+       flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
         if (vm->use_cpu_for_update)
                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
         else
                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
                                 AMDGPU_GEM_CREATE_SHADOW);
  
-       if (vm->pte_support_ats) {
-               init_value = AMDGPU_PTE_DEFAULT_ATC;
-               if (level != AMDGPU_VM_PTB)
-                       init_value |= AMDGPU_PDE_PTE;
-
-       }
-
         /* walk over the address space and allocate the page tables */
         for (pt_idx = from; pt_idx <= to; ++pt_idx) {
                 struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                 if (!entry->base.bo) {
                         r = amdgpu_bo_create(adev,
                                              amdgpu_vm_bo_size(adev, level),
-                                            AMDGPU_GPU_PAGE_SIZE, true,
-                                            AMDGPU_GEM_DOMAIN_VRAM,
-                                            flags,
-                                            NULL, resv, init_value, &pt);
+                                            AMDGPU_GPU_PAGE_SIZE,
+                                            AMDGPU_GEM_DOMAIN_VRAM, flags,
+                                            ttm_bo_type_kernel, resv, &pt);
                         if (r)
                                 return r;
  
+                       r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
+                       if (r) {
+                               amdgpu_bo_unref(&pt->shadow);
+                               amdgpu_bo_unref(&pt);
+                               return r;
+                       }
+
                         if (vm->use_cpu_for_update) {
                                 r = amdgpu_bo_kmap(pt, NULL);
                                 if (r) {
+                                       amdgpu_bo_unref(&pt->shadow);
                                         amdgpu_bo_unref(&pt);
                                         return r;
                                 }
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
                                 ((1 << shift) - 1);
                         r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
-                                                  sub_eaddr, level);
+                                                  sub_eaddr, level, ats);
                         if (r)
                                 return r;
                 }
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
                         struct amdgpu_vm *vm,
                         uint64_t saddr, uint64_t size)
  {
-       uint64_t last_pfn;
         uint64_t eaddr;
+       bool ats = false;
  
         /* validate the parameters */
         if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
                 return -EINVAL;
  
         eaddr = saddr + size - 1;
-       last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
-       if (last_pfn >= adev->vm_manager.max_pfn) {
-               dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
-                       last_pfn, adev->vm_manager.max_pfn);
-               return -EINVAL;
-       }
+
+       if (vm->pte_support_ats)
+               ats = saddr < AMDGPU_VA_HOLE_START;
  
         saddr /= AMDGPU_GPU_PAGE_SIZE;
         eaddr /= AMDGPU_GPU_PAGE_SIZE;
  
+       if (eaddr >= adev->vm_manager.max_pfn) {
+               dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+                       eaddr, adev->vm_manager.max_pfn);
+               return -EINVAL;
+       }
+
         return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
-                                     adev->vm_manager.root_level);
+                                     adev->vm_manager.root_level, ats);
  }
  
  /**
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
  
  static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
  {
-       return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+       return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
  }
  
  /**
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
                 id->oa_base != job->oa_base ||
                 id->oa_size != job->oa_size);
         bool vm_flush_needed = job->vm_needs_flush;
+       bool pasid_mapping_needed = id->pasid != job->pasid ||
+               !id->pasid_mapping ||
+               !dma_fence_is_signaled(id->pasid_mapping);
+       struct dma_fence *fence = NULL;
         unsigned patch_offset = 0;
         int r;
  
         if (amdgpu_vmid_had_gpu_reset(adev, id)) {
                 gds_switch_needed = true;
                 vm_flush_needed = true;
+               pasid_mapping_needed = true;
         }
  
+       gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+       vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+       pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+               ring->funcs->emit_wreg;
+
         if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
                 return 0;
  
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
         if (need_pipe_sync)
                 amdgpu_ring_emit_pipeline_sync(ring);
  
-       if (ring->funcs->emit_vm_flush && vm_flush_needed) {
-               struct dma_fence *fence;
-
+       if (vm_flush_needed) {
                 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
                 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+       }
+
+       if (pasid_mapping_needed)
+               amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
  
+       if (vm_flush_needed || pasid_mapping_needed) {
                 r = amdgpu_fence_emit(ring, &fence);
                 if (r)
                         return r;
+       }
  
+       if (vm_flush_needed) {
                 mutex_lock(&id_mgr->lock);
                 dma_fence_put(id->last_flush);
-               id->last_flush = fence;
-               id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+               id->last_flush = dma_fence_get(fence);
+               id->current_gpu_reset_count =
+                       atomic_read(&adev->gpu_reset_counter);
                 mutex_unlock(&id_mgr->lock);
         }
  
+       if (pasid_mapping_needed) {
+               id->pasid = job->pasid;
+               dma_fence_put(id->pasid_mapping);
+               id->pasid_mapping = dma_fence_get(fence);
+       }
+       dma_fence_put(fence);
+
         if (ring->funcs->emit_gds_switch && gds_switch_needed) {
                 id->gds_base = job->gds_base;
                 id->gds_size = job->gds_size;
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
   * amdgpu_vm_do_set_ptes - helper to call the right asic function
   *
   * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
   * @pe: addr of the page entry
   * @addr: dst addr to write into pe
   * @count: number of page entries to update
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
   * to setup the page table using the DMA.
   */
  static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+                                 struct amdgpu_bo *bo,
                                   uint64_t pe, uint64_t addr,
                                   unsigned count, uint32_t incr,
                                   uint64_t flags)
  {
+       pe += amdgpu_bo_gpu_offset(bo);
         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
  
         if (count < 3) {
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
   * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
   *
   * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
   * @pe: addr of the page entry
   * @addr: dst addr to write into pe
   * @count: number of page entries to update
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
   * Traces the parameters and calls the DMA function to copy the PTEs.
   */
  static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+                                  struct amdgpu_bo *bo,
                                    uint64_t pe, uint64_t addr,
                                    unsigned count, uint32_t incr,
                                    uint64_t flags)
  {
         uint64_t src = (params->src + (addr >> 12) * 8);
  
-
+       pe += amdgpu_bo_gpu_offset(bo);
         trace_amdgpu_vm_copy_ptes(pe, src, count);
  
         amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
   * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
   *
   * @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
   * @pe: kmap addr of the page entry
   * @addr: dst addr to write into pe
   * @count: number of page entries to update
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
   * Write count number of PT/PD entries directly.
   */
  static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+                                  struct amdgpu_bo *bo,
                                    uint64_t pe, uint64_t addr,
                                    unsigned count, uint32_t incr,
                                    uint64_t flags)
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
         unsigned int i;
         uint64_t value;
  
+       pe += (unsigned long)amdgpu_bo_kptr(bo);
+
         trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
  
         for (i = 0; i < count; i++) {
                 value = params->pages_addr ?
                         amdgpu_vm_map_gart(params->pages_addr, addr) :
                         addr;
-               amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
-                                       i, value, flags);
+               amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
+                                      i, value, flags);
                 addr += incr;
         }
  }
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
                                  struct amdgpu_vm_pt *parent,
                                  struct amdgpu_vm_pt *entry)
  {
-       struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
-       uint64_t pd_addr, shadow_addr = 0;
+       struct amdgpu_bo *bo = parent->base.bo, *pbo;
         uint64_t pde, pt, flags;
         unsigned level;
  
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
         if (entry->huge)
                 return;
  
-       if (vm->use_cpu_for_update) {
-               pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-       } else {
-               pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-               shadow = parent->base.bo->shadow;
-               if (shadow)
-                       shadow_addr = amdgpu_bo_gpu_offset(shadow);
-       }
-
-       for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+       for (level = 0, pbo = bo->parent; pbo; ++level)
                 pbo = pbo->parent;
  
         level += params->adev->vm_manager.root_level;
-       pt = amdgpu_bo_gpu_offset(bo);
+       pt = amdgpu_bo_gpu_offset(entry->base.bo);
         flags = AMDGPU_PTE_VALID;
-       amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
-       if (shadow) {
-               pde = shadow_addr + (entry - parent->entries) * 8;
-               params->func(params, pde, pt, 1, 0, flags);
-       }
-
-       pde = pd_addr + (entry - parent->entries) * 8;
-       params->func(params, pde, pt, 1, 0, flags);
+       amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+       pde = (entry - parent->entries) * 8;
+       if (bo->shadow)
+               params->func(params, bo->shadow, pde, pt, 1, 0, flags);
+       params->func(params, bo, pde, pt, 1, 0, flags);
  }
  
  /*
@@ -856,7 +976,7 @@ restart:
         if (vm->use_cpu_for_update) {
                 /* Flush HDP */
                 mb();
-               amdgpu_gart_flush_gpu_tlb(adev, 0);
+               amdgpu_asic_flush_hdp(adev, NULL);
         } else if (params.ib->length_dw == 0) {
                 amdgpu_job_free(job);
         } else {
@@ -870,11 +990,6 @@ restart:
                 amdgpu_ring_pad_ib(ring, params.ib);
                 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
                                  AMDGPU_FENCE_OWNER_VM, false);
-               if (root->shadow)
-                       amdgpu_sync_resv(adev, &job->sync,
-                                        root->shadow->tbo.resv,
-                                        AMDGPU_FENCE_OWNER_VM, false);
-
                 WARN_ON(params.ib->length_dw > ndw);
                 r = amdgpu_job_submit(job, ring, &vm->entity,
                                       AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
                                         unsigned nptes, uint64_t dst,
                                         uint64_t flags)
  {
-       uint64_t pd_addr, pde;
+       uint64_t pde;
  
         /* In the case of a mixed PT the PDE must point to it*/
         if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
         }
  
         entry->huge = true;
-       amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
-                              &dst, &flags);
+       amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
  
-       if (p->func == amdgpu_vm_cpu_set_ptes) {
-               pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-       } else {
-               if (parent->base.bo->shadow) {
-                       pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
-                       pde = pd_addr + (entry - parent->entries) * 8;
-                       p->func(p, pde, dst, 1, 0, flags);
-               }
-               pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-       }
-       pde = pd_addr + (entry - parent->entries) * 8;
-       p->func(p, pde, dst, 1, 0, flags);
+       pde = (entry - parent->entries) * 8;
+       if (parent->base.bo->shadow)
+               p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
+       p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
  }
  
  /**
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
         uint64_t addr, pe_start;
         struct amdgpu_bo *pt;
         unsigned nptes;
-       bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
  
         /* walk over the address space and update the page tables */
         for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
                         continue;
  
                 pt = entry->base.bo;
-               if (use_cpu_update) {
-                       pe_start = (unsigned long)amdgpu_bo_kptr(pt);
-               } else {
-                       if (pt->shadow) {
-                               pe_start = amdgpu_bo_gpu_offset(pt->shadow);
-                               pe_start += (addr & mask) * 8;
-                               params->func(params, pe_start, dst, nptes,
-                                            AMDGPU_GPU_PAGE_SIZE, flags);
-                       }
-                       pe_start = amdgpu_bo_gpu_offset(pt);
-               }
-
-               pe_start += (addr & mask) * 8;
-               params->func(params, pe_start, dst, nptes,
+               pe_start = (addr & mask) * 8;
+               if (pt->shadow)
+                       params->func(params, pt->shadow, pe_start, dst, nptes,
+                                    AMDGPU_GPU_PAGE_SIZE, flags);
+               params->func(params, pt, pe_start, dst, nptes,
                              AMDGPU_GPU_PAGE_SIZE, flags);
         }
  
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
  
         } else {
                 /* set page commands needed */
-               ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+               ndw += ncmds * 10;
  
                 /* extra commands for begin/end fragments */
-               ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
-                               * adev->vm_manager.fragment_size;
+               ndw += 2 * 10 * adev->vm_manager.fragment_size;
  
                 params.func = amdgpu_vm_do_set_ptes;
         }
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
         if (vm->use_cpu_for_update) {
                 /* Flush HDP */
                 mb();
-               amdgpu_gart_flush_gpu_tlb(adev, 0);
+               amdgpu_asic_flush_hdp(adev, NULL);
         }
  
         spin_lock(&vm->status_lock);
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
  
         spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
         enable = !!atomic_read(&adev->vm_manager.num_prt_users);
-       adev->gart.gart_funcs->set_prt(adev, enable);
+       adev->gmc.gmc_funcs->set_prt(adev, enable);
         spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
  }
  
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
   */
  static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
  {
-       if (!adev->gart.gart_funcs->set_prt)
+       if (!adev->gmc.gmc_funcs->set_prt)
                 return;
  
         if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
  {
         struct amdgpu_prt_cb *cb;
  
-       if (!adev->gart.gart_funcs->set_prt)
+       if (!adev->gmc.gmc_funcs->set_prt)
                 return;
  
         cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
                           struct dma_fence **fence)
  {
         struct amdgpu_bo_va_mapping *mapping;
+       uint64_t init_pte_value = 0;
         struct dma_fence *f = NULL;
         int r;
-       uint64_t init_pte_value = 0;
  
         while (!list_empty(&vm->freed)) {
                 mapping = list_first_entry(&vm->freed,
                         struct amdgpu_bo_va_mapping, list);
                 list_del(&mapping->list);
  
-               if (vm->pte_support_ats)
+               if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
                         init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
  
                 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  {
         const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
                 AMDGPU_VM_PTE_COUNT(adev) * 8);
-       uint64_t init_pde_value = 0, flags;
         unsigned ring_instance;
         struct amdgpu_ring *ring;
         struct drm_sched_rq *rq;
         unsigned long size;
+       uint64_t flags;
         int r, i;
  
         vm->va = RB_ROOT_CACHED;
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
                                                 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
  
-               if (adev->asic_type == CHIP_RAVEN) {
+               if (adev->asic_type == CHIP_RAVEN)
                         vm->pte_support_ats = true;
-                       init_pde_value = AMDGPU_PTE_DEFAULT_ATC
-                                       | AMDGPU_PDE_PTE;
-
-               }
-       } else
+       } else {
                 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
                                                 AMDGPU_VM_USE_CPU_FOR_GFX);
+       }
         DRM_DEBUG_DRIVER("VM update mode is %s\n",
                          vm->use_cpu_for_update ? "CPU" : "SDMA");
         WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
                   "CPU update of VM recommended only for large BAR system\n");
         vm->last_update = NULL;
  
-       flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
-                       AMDGPU_GEM_CREATE_VRAM_CLEARED;
+       flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
         if (vm->use_cpu_for_update)
                 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
         else
-               flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-                               AMDGPU_GEM_CREATE_SHADOW);
+               flags |= AMDGPU_GEM_CREATE_SHADOW;
  
         size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-       r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
-                            flags, NULL, NULL, init_pde_value,
-                            &vm->root.base.bo);
+       r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
+                            ttm_bo_type_kernel, NULL, &vm->root.base.bo);
         if (r)
                 goto error_free_sched_entity;
  
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         if (r)
                 goto error_free_root;
  
+       r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+                              adev->vm_manager.root_level,
+                              vm->pte_support_ats);
+       if (r)
+               goto error_unreserve;
+
         vm->root.base.vm = vm;
         list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
         list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  
         return 0;
  
+error_unreserve:
+       amdgpu_bo_unreserve(vm->root.base.bo);
+
  error_free_root:
         amdgpu_bo_unref(&vm->root.base.bo->shadow);
         amdgpu_bo_unref(&vm->root.base.bo);
@@ -2363,6 +2461,73 @@ error_free_sched_entity:
         return r;
  }
  
+/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+       bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+       int r;
+
+       r = amdgpu_bo_reserve(vm->root.base.bo, true);
+       if (r)
+               return r;
+
+       /* Sanity checks */
+       if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
+               r = -EINVAL;
+               goto error;
+       }
+
+       /* Check if PD needs to be reinitialized and do it before
+        * changing any other state, in case it fails.
+        */
+       if (pte_support_ats != vm->pte_support_ats) {
+               r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+                              adev->vm_manager.root_level,
+                              pte_support_ats);
+               if (r)
+                       goto error;
+       }
+
+       /* Update VM state */
+       vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+                                   AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+       vm->pte_support_ats = pte_support_ats;
+       DRM_DEBUG_DRIVER("VM update mode is %s\n",
+                        vm->use_cpu_for_update ? "CPU" : "SDMA");
+       WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+                 "CPU update of VM recommended only for large BAR system\n");
+
+       if (vm->pasid) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+               idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+               spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+               vm->pasid = 0;
+       }
+
+error:
+       amdgpu_bo_unreserve(vm->root.base.bo);
+       return r;
+}
+
  /**
   * amdgpu_vm_free_levels - free PD/PT levels
   *
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
         struct amdgpu_bo_va_mapping *mapping, *tmp;
-       bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+       bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
         struct amdgpu_bo *root;
         u64 fault;
         int i, r;
  
+       amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
         /* Clear pending page faults from IH when the VM is destroyed */
         while (kfifo_get(&vm->faults, &fault))
                 amdgpu_ih_clear_fault(adev, fault);