Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index c8c26f21993ccd7078172a264f21dd276d3895bb..6fc16eecf2dce5fc448afaa1681fbb086259ea35 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,52 +33,6 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"
  
-/*
- * PASID manager
- *
- * PASIDs are global address space identifiers that can be shared
- * between the GPU, an IOMMU and the driver. VMs on different devices
- * may use the same PASID if they share the same address
- * space. Therefore PASIDs are allocated using a global IDA. VMs are
- * looked up from the PASID per amdgpu_device.
- */
-static DEFINE_IDA(amdgpu_vm_pasid_ida);
-
-/**
- * amdgpu_vm_alloc_pasid - Allocate a PASID
- * @bits: Maximum width of the PASID in bits, must be at least 1
- *
- * Allocates a PASID of the given width while keeping smaller PASIDs
- * available if possible.
- *
- * Returns a positive integer on success. Returns %-EINVAL if bits==0.
- * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
- * memory allocation failure.
- */
-int amdgpu_vm_alloc_pasid(unsigned int bits)
-{
-       int pasid = -EINVAL;
-
-       for (bits = min(bits, 31U); bits > 0; bits--) {
-               pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
-                                      1U << (bits - 1), 1U << bits,
-                                      GFP_KERNEL);
-               if (pasid != -ENOSPC)
-                       break;
-       }
-
-       return pasid;
-}
-
-/**
- * amdgpu_vm_free_pasid - Free a PASID
- * @pasid: PASID to free
- */
-void amdgpu_vm_free_pasid(unsigned int pasid)
-{
-       ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
-}
-
  /*
   * GPUVM
   * GPUVM is similar to the legacy gart on older asics, however
@@ -138,6 +92,35 @@ struct amdgpu_prt_cb {
         struct dma_fence_cb cb;
  };
  
+/**
+ * amdgpu_vm_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
+                                     unsigned level)
+{
+       unsigned shift = 0xff;
+
+       switch (level) {
+       case AMDGPU_VM_PDB2:
+       case AMDGPU_VM_PDB1:
+       case AMDGPU_VM_PDB0:
+               shift = 9 * (AMDGPU_VM_PDB0 - level) +
+                       adev->vm_manager.block_size;
+               break;
+       case AMDGPU_VM_PTB:
+               shift = 0;
+               break;
+       default:
+               dev_err(adev->dev, "the level%d isn't supported.\n", level);
+       }
+
+       return shift;
+}
+
  /**
   * amdgpu_vm_num_entries - return the number of entries in a PD/PT
   *
@@ -148,17 +131,18 @@ struct amdgpu_prt_cb {
  static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
                                       unsigned level)
  {
-       if (level == 0)
+       unsigned shift = amdgpu_vm_level_shift(adev,
+                                              adev->vm_manager.root_level);
+
+       if (level == adev->vm_manager.root_level)
                 /* For the root directory */
-               return adev->vm_manager.max_pfn >>
-                       (adev->vm_manager.block_size *
-                        adev->vm_manager.num_level);
-       else if (level == adev->vm_manager.num_level)
+               return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
+       else if (level != AMDGPU_VM_PTB)
+               /* Everything in between */
+               return 512;
+       else
                 /* For the page tables on the leaves */
                 return AMDGPU_VM_PTE_COUNT(adev);
-       else
-               /* Everything in between */
-               return 1 << adev->vm_manager.block_size;
  }
  
  /**
@@ -288,8 +272,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                                   uint64_t saddr, uint64_t eaddr,
                                   unsigned level)
  {
-       unsigned shift = (adev->vm_manager.num_level - level) *
-               adev->vm_manager.block_size;
+       unsigned shift = amdgpu_vm_level_shift(adev, level);
         unsigned pt_idx, from, to;
         int r;
         u64 flags;
@@ -312,9 +295,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
             to >= amdgpu_vm_num_entries(adev, level))
                 return -EINVAL;
  
-       if (to > parent->last_entry_used)
-               parent->last_entry_used = to;
-
         ++level;
         saddr = saddr & ((1 << shift) - 1);
         eaddr = eaddr & ((1 << shift) - 1);
@@ -329,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
  
         if (vm->pte_support_ats) {
                 init_value = AMDGPU_PTE_DEFAULT_ATC;
-               if (level != adev->vm_manager.num_level - 1)
+               if (level != AMDGPU_VM_PTB)
                         init_value |= AMDGPU_PDE_PTE;
  
         }
@@ -369,10 +349,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
                         spin_lock(&vm->status_lock);
                         list_add(&entry->base.vm_status, &vm->relocated);
                         spin_unlock(&vm->status_lock);
-                       entry->addr = 0;
                 }
  
-               if (level < adev->vm_manager.num_level) {
+               if (level < AMDGPU_VM_PTB) {
                         uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
                         uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
                                 ((1 << shift) - 1);
@@ -418,287 +397,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
         saddr /= AMDGPU_GPU_PAGE_SIZE;
         eaddr /= AMDGPU_GPU_PAGE_SIZE;
  
-       return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
-}
-
-/**
- * amdgpu_vm_had_gpu_reset - check if reset occured since last use
- *
- * @adev: amdgpu_device pointer
- * @id: VMID structure
- *
- * Check if GPU reset occured since last use of the VMID.
- */
-static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
-                                   struct amdgpu_vm_id *id)
-{
-       return id->current_gpu_reset_count !=
-               atomic_read(&adev->gpu_reset_counter);
-}
-
-static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub)
-{
-       return !!vm->reserved_vmid[vmhub];
-}
-
-/* idr_mgr->lock must be held */
-static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm,
-                                              struct amdgpu_ring *ring,
-                                              struct amdgpu_sync *sync,
-                                              struct dma_fence *fence,
-                                              struct amdgpu_job *job)
-{
-       struct amdgpu_device *adev = ring->adev;
-       unsigned vmhub = ring->funcs->vmhub;
-       uint64_t fence_context = adev->fence_context + ring->idx;
-       struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub];
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       struct dma_fence *updates = sync->last_vm_update;
-       int r = 0;
-       struct dma_fence *flushed, *tmp;
-       bool needs_flush = vm->use_cpu_for_update;
-
-       flushed  = id->flushed_updates;
-       if ((amdgpu_vm_had_gpu_reset(adev, id)) ||
-           (atomic64_read(&id->owner) != vm->client_id) ||
-           (job->vm_pd_addr != id->pd_gpu_addr) ||
-           (updates && (!flushed || updates->context != flushed->context ||
-                       dma_fence_is_later(updates, flushed))) ||
-           (!id->last_flush || (id->last_flush->context != fence_context &&
-                                !dma_fence_is_signaled(id->last_flush)))) {
-               needs_flush = true;
-               /* to prevent one context starved by another context */
-               id->pd_gpu_addr = 0;
-               tmp = amdgpu_sync_peek_fence(&id->active, ring);
-               if (tmp) {
-                       r = amdgpu_sync_fence(adev, sync, tmp);
-                       return r;
-               }
-       }
-
-       /* Good we can use this VMID. Remember this submission as
-       * user of the VMID.
-       */
-       r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-       if (r)
-               goto out;
-
-       if (updates && (!flushed || updates->context != flushed->context ||
-                       dma_fence_is_later(updates, flushed))) {
-               dma_fence_put(id->flushed_updates);
-               id->flushed_updates = dma_fence_get(updates);
-       }
-       id->pd_gpu_addr = job->vm_pd_addr;
-       atomic64_set(&id->owner, vm->client_id);
-       job->vm_needs_flush = needs_flush;
-       if (needs_flush) {
-               dma_fence_put(id->last_flush);
-               id->last_flush = NULL;
-       }
-       job->vm_id = id - id_mgr->ids;
-       trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
-       return r;
-}
-
-/**
- * amdgpu_vm_grab_id - allocate the next free VMID
- *
- * @vm: vm to allocate id for
- * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
- *
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
- */
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-                     struct amdgpu_sync *sync, struct dma_fence *fence,
-                     struct amdgpu_job *job)
-{
-       struct amdgpu_device *adev = ring->adev;
-       unsigned vmhub = ring->funcs->vmhub;
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       uint64_t fence_context = adev->fence_context + ring->idx;
-       struct dma_fence *updates = sync->last_vm_update;
-       struct amdgpu_vm_id *id, *idle;
-       struct dma_fence **fences;
-       unsigned i;
-       int r = 0;
-
-       mutex_lock(&id_mgr->lock);
-       if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) {
-               r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job);
-               mutex_unlock(&id_mgr->lock);
-               return r;
-       }
-       fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
-       if (!fences) {
-               mutex_unlock(&id_mgr->lock);
-               return -ENOMEM;
-       }
-       /* Check if we have an idle VMID */
-       i = 0;
-       list_for_each_entry(idle, &id_mgr->ids_lru, list) {
-               fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
-               if (!fences[i])
-                       break;
-               ++i;
-       }
-
-       /* If we can't find a idle VMID to use, wait till one becomes available */
-       if (&idle->list == &id_mgr->ids_lru) {
-               u64 fence_context = adev->vm_manager.fence_context + ring->idx;
-               unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
-               struct dma_fence_array *array;
-               unsigned j;
-
-               for (j = 0; j < i; ++j)
-                       dma_fence_get(fences[j]);
-
-               array = dma_fence_array_create(i, fences, fence_context,
-                                          seqno, true);
-               if (!array) {
-                       for (j = 0; j < i; ++j)
-                               dma_fence_put(fences[j]);
-                       kfree(fences);
-                       r = -ENOMEM;
-                       goto error;
-               }
-
-
-               r = amdgpu_sync_fence(ring->adev, sync, &array->base);
-               dma_fence_put(&array->base);
-               if (r)
-                       goto error;
-
-               mutex_unlock(&id_mgr->lock);
-               return 0;
-
-       }
-       kfree(fences);
-
-       job->vm_needs_flush = vm->use_cpu_for_update;
-       /* Check if we can use a VMID already assigned to this VM */
-       list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
-               struct dma_fence *flushed;
-               bool needs_flush = vm->use_cpu_for_update;
-
-               /* Check all the prerequisites to using this VMID */
-               if (amdgpu_vm_had_gpu_reset(adev, id))
-                       continue;
-
-               if (atomic64_read(&id->owner) != vm->client_id)
-                       continue;
-
-               if (job->vm_pd_addr != id->pd_gpu_addr)
-                       continue;
-
-               if (!id->last_flush ||
-                   (id->last_flush->context != fence_context &&
-                    !dma_fence_is_signaled(id->last_flush)))
-                       needs_flush = true;
-
-               flushed  = id->flushed_updates;
-               if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
-                       needs_flush = true;
-
-               /* Concurrent flushes are only possible starting with Vega10 */
-               if (adev->asic_type < CHIP_VEGA10 && needs_flush)
-                       continue;
-
-               /* Good we can use this VMID. Remember this submission as
-                * user of the VMID.
-                */
-               r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-               if (r)
-                       goto error;
-
-               if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
-                       dma_fence_put(id->flushed_updates);
-                       id->flushed_updates = dma_fence_get(updates);
-               }
-
-               if (needs_flush)
-                       goto needs_flush;
-               else
-                       goto no_flush_needed;
-
-       };
-
-       /* Still no ID to use? Then use the idle one found earlier */
-       id = idle;
-
-       /* Remember this submission as user of the VMID */
-       r = amdgpu_sync_fence(ring->adev, &id->active, fence);
-       if (r)
-               goto error;
-
-       id->pd_gpu_addr = job->vm_pd_addr;
-       dma_fence_put(id->flushed_updates);
-       id->flushed_updates = dma_fence_get(updates);
-       atomic64_set(&id->owner, vm->client_id);
-
-needs_flush:
-       job->vm_needs_flush = true;
-       dma_fence_put(id->last_flush);
-       id->last_flush = NULL;
-
-no_flush_needed:
-       list_move_tail(&id->list, &id_mgr->ids_lru);
-
-       job->vm_id = id - id_mgr->ids;
-       trace_amdgpu_vm_grab_id(vm, ring, job);
-
-error:
-       mutex_unlock(&id_mgr->lock);
-       return r;
-}
-
-static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev,
-                                         struct amdgpu_vm *vm,
-                                         unsigned vmhub)
-{
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-
-       mutex_lock(&id_mgr->lock);
-       if (vm->reserved_vmid[vmhub]) {
-               list_add(&vm->reserved_vmid[vmhub]->list,
-                       &id_mgr->ids_lru);
-               vm->reserved_vmid[vmhub] = NULL;
-               atomic_dec(&id_mgr->reserved_vmid_num);
-       }
-       mutex_unlock(&id_mgr->lock);
-}
-
-static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev,
-                                        struct amdgpu_vm *vm,
-                                        unsigned vmhub)
-{
-       struct amdgpu_vm_id_manager *id_mgr;
-       struct amdgpu_vm_id *idle;
-       int r = 0;
-
-       id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       mutex_lock(&id_mgr->lock);
-       if (vm->reserved_vmid[vmhub])
-               goto unlock;
-       if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
-           AMDGPU_VM_MAX_RESERVED_VMID) {
-               DRM_ERROR("Over limitation of reserved vmid\n");
-               atomic_dec(&id_mgr->reserved_vmid_num);
-               r = -EINVAL;
-               goto unlock;
-       }
-       /* Select the first entry VMID */
-       idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list);
-       list_del_init(&idle->list);
-       vm->reserved_vmid[vmhub] = idle;
-       mutex_unlock(&id_mgr->lock);
-
-       return 0;
-unlock:
-       mutex_unlock(&id_mgr->lock);
-       return r;
+       return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
+                                     adev->vm_manager.root_level);
  }
  
  /**
@@ -715,7 +415,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
  
         has_compute_vm_bug = false;
  
-       ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+       ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
         if (ip_block) {
                 /* Compute has a VM bug for GFX version < 7.
                    Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
@@ -741,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
  {
         struct amdgpu_device *adev = ring->adev;
         unsigned vmhub = ring->funcs->vmhub;
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       struct amdgpu_vm_id *id;
+       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+       struct amdgpu_vmid *id;
         bool gds_switch_needed;
         bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
  
-       if (job->vm_id == 0)
+       if (job->vmid == 0)
                 return false;
-       id = &id_mgr->ids[job->vm_id];
+       id = &id_mgr->ids[job->vmid];
         gds_switch_needed = ring->funcs->emit_gds_switch && (
                 id->gds_base != job->gds_base ||
                 id->gds_size != job->gds_size ||
@@ -757,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
                 id->oa_base != job->oa_base ||
                 id->oa_size != job->oa_size);
  
-       if (amdgpu_vm_had_gpu_reset(adev, id))
+       if (amdgpu_vmid_had_gpu_reset(adev, id))
                 return true;
  
         return vm_flush_needed || gds_switch_needed;
@@ -772,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
   * amdgpu_vm_flush - hardware flush the vm
   *
   * @ring: ring to use for flush
- * @vm_id: vmid number to use
+ * @vmid: vmid number to use
   * @pd_addr: address of the page directory
   *
   * Emit a VM flush when it is necessary.
@@ -781,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
  {
         struct amdgpu_device *adev = ring->adev;
         unsigned vmhub = ring->funcs->vmhub;
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id];
+       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+       struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
         bool gds_switch_needed = ring->funcs->emit_gds_switch && (
                 id->gds_base != job->gds_base ||
                 id->gds_size != job->gds_size ||
@@ -794,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
         unsigned patch_offset = 0;
         int r;
  
-       if (amdgpu_vm_had_gpu_reset(adev, id)) {
+       if (amdgpu_vmid_had_gpu_reset(adev, id)) {
                 gds_switch_needed = true;
                 vm_flush_needed = true;
         }
@@ -811,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
         if (ring->funcs->emit_vm_flush && vm_flush_needed) {
                 struct dma_fence *fence;
  
-               trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr);
-               amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+               trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+               amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
  
                 r = amdgpu_fence_emit(ring, &fence);
                 if (r)
@@ -832,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
                 id->gws_size = job->gws_size;
                 id->oa_base = job->oa_base;
                 id->oa_size = job->oa_size;
-               amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
+               amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
                                             job->gds_size, job->gws_base,
                                             job->gws_size, job->oa_base,
                                             job->oa_size);
@@ -849,49 +549,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
         return 0;
  }
  
-/**
- * amdgpu_vm_reset_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- * @vm_id: vmid number to use
- *
- * Reset saved GDW, GWS and OA to force switch on next flush.
- */
-void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
-                       unsigned vmid)
-{
-       struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-       struct amdgpu_vm_id *id = &id_mgr->ids[vmid];
-
-       atomic64_set(&id->owner, 0);
-       id->gds_base = 0;
-       id->gds_size = 0;
-       id->gws_base = 0;
-       id->gws_size = 0;
-       id->oa_base = 0;
-       id->oa_size = 0;
-}
-
-/**
- * amdgpu_vm_reset_all_id - reset VMID to zero
- *
- * @adev: amdgpu device structure
- *
- * Reset VMID to force flush on next use
- */
-void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev)
-{
-       unsigned i, j;
-
-       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-               struct amdgpu_vm_id_manager *id_mgr =
-                       &adev->vm_manager.id_mgr[i];
-
-               for (j = 1; j < id_mgr->num_ids; ++j)
-                       amdgpu_vm_reset_id(adev, i, j);
-       }
-}
-
  /**
   * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
   *
@@ -1043,162 +700,52 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  }
  
  /*
- * amdgpu_vm_update_level - update a single level in the hierarchy
+ * amdgpu_vm_update_pde - update a single level in the hierarchy
   *
- * @adev: amdgpu_device pointer
+ * @param: parameters for the update
   * @vm: requested vm
   * @parent: parent directory
+ * @entry: entry to update
   *
- * Makes sure all entries in @parent are up to date.
- * Returns 0 for success, error for failure.
+ * Makes sure the requested entry in parent is up to date.
   */
-static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-                                 struct amdgpu_vm *vm,
-                                 struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
+                                struct amdgpu_vm *vm,
+                                struct amdgpu_vm_pt *parent,
+                                struct amdgpu_vm_pt *entry)
  {
-       struct amdgpu_bo *shadow;
-       struct amdgpu_ring *ring = NULL;
+       struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
         uint64_t pd_addr, shadow_addr = 0;
-       uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
-       unsigned count = 0, pt_idx, ndw = 0;
-       struct amdgpu_job *job;
-       struct amdgpu_pte_update_params params;
-       struct dma_fence *fence = NULL;
-       uint32_t incr;
-
-       int r;
-
-       if (!parent->entries)
-               return 0;
+       uint64_t pde, pt, flags;
+       unsigned level;
  
-       memset(&params, 0, sizeof(params));
-       params.adev = adev;
-       shadow = parent->base.bo->shadow;
+       /* Don't update huge pages here */
+       if (entry->huge)
+               return;
  
         if (vm->use_cpu_for_update) {
                 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-               r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
-               if (unlikely(r))
-                       return r;
-
-               params.func = amdgpu_vm_cpu_set_ptes;
         } else {
-               ring = container_of(vm->entity.sched, struct amdgpu_ring,
-                                   sched);
-
-               /* padding, etc. */
-               ndw = 64;
-
-               /* assume the worst case */
-               ndw += parent->last_entry_used * 6;
-
                 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-
-               if (shadow) {
+               shadow = parent->base.bo->shadow;
+               if (shadow)
                         shadow_addr = amdgpu_bo_gpu_offset(shadow);
-                       ndw *= 2;
-               } else {
-                       shadow_addr = 0;
-               }
-
-               r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-               if (r)
-                       return r;
-
-               params.ib = &job->ibs[0];
-               params.func = amdgpu_vm_do_set_ptes;
-       }
-
-
-       /* walk over the address space and update the directory */
-       for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-               struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-               struct amdgpu_bo *bo = entry->base.bo;
-               uint64_t pde, pt;
-
-               if (bo == NULL)
-                       continue;
-
-               spin_lock(&vm->status_lock);
-               list_del_init(&entry->base.vm_status);
-               spin_unlock(&vm->status_lock);
-
-               pt = amdgpu_bo_gpu_offset(bo);
-               pt = amdgpu_gart_get_vm_pde(adev, pt);
-               /* Don't update huge pages here */
-               if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
-                   parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
-                       continue;
-
-               parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
-
-               pde = pd_addr + pt_idx * 8;
-               incr = amdgpu_bo_size(bo);
-               if (((last_pde + 8 * count) != pde) ||
-                   ((last_pt + incr * count) != pt) ||
-                   (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-
-                       if (count) {
-                               if (shadow)
-                                       params.func(&params,
-                                                   last_shadow,
-                                                   last_pt, count,
-                                                   incr,
-                                                   AMDGPU_PTE_VALID);
-
-                               params.func(&params, last_pde,
-                                           last_pt, count, incr,
-                                           AMDGPU_PTE_VALID);
-                       }
-
-                       count = 1;
-                       last_pde = pde;
-                       last_shadow = shadow_addr + pt_idx * 8;
-                       last_pt = pt;
-               } else {
-                       ++count;
-               }
-       }
-
-       if (count) {
-               if (vm->root.base.bo->shadow)
-                       params.func(&params, last_shadow, last_pt,
-                                   count, incr, AMDGPU_PTE_VALID);
-
-               params.func(&params, last_pde, last_pt,
-                           count, incr, AMDGPU_PTE_VALID);
         }
  
-       if (!vm->use_cpu_for_update) {
-               if (params.ib->length_dw == 0) {
-                       amdgpu_job_free(job);
-               } else {
-                       amdgpu_ring_pad_ib(ring, params.ib);
-                       amdgpu_sync_resv(adev, &job->sync,
-                                        parent->base.bo->tbo.resv,
-                                        AMDGPU_FENCE_OWNER_VM, false);
-                       if (shadow)
-                               amdgpu_sync_resv(adev, &job->sync,
-                                                shadow->tbo.resv,
-                                                AMDGPU_FENCE_OWNER_VM, false);
-
-                       WARN_ON(params.ib->length_dw > ndw);
-                       r = amdgpu_job_submit(job, ring, &vm->entity,
-                                       AMDGPU_FENCE_OWNER_VM, &fence);
-                       if (r)
-                               goto error_free;
+       for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+               pbo = pbo->parent;
  
-                       amdgpu_bo_fence(parent->base.bo, fence, true);
-                       dma_fence_put(vm->last_update);
-                       vm->last_update = fence;
-               }
+       level += params->adev->vm_manager.root_level;
+       pt = amdgpu_bo_gpu_offset(bo);
+       flags = AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
+       if (shadow) {
+               pde = shadow_addr + (entry - parent->entries) * 8;
+               params->func(params, pde, pt, 1, 0, flags);
         }
  
-       return 0;
-
-error_free:
-       amdgpu_job_free(job);
-       return r;
+       pde = pd_addr + (entry - parent->entries) * 8;
+       params->func(params, pde, pt, 1, 0, flags);
  }
  
  /*
@@ -1208,27 +755,29 @@ error_free:
   *
   * Mark all PD level as invalid after an error.
   */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
-                                      struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
+                                      struct amdgpu_vm *vm,
+                                      struct amdgpu_vm_pt *parent,
+                                      unsigned level)
  {
-       unsigned pt_idx;
+       unsigned pt_idx, num_entries;
  
         /*
          * Recurse into the subdirectories. This recursion is harmless because
          * we only have a maximum of 5 layers.
          */
-       for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
+       num_entries = amdgpu_vm_num_entries(adev, level);
+       for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
                 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
  
                 if (!entry->base.bo)
                         continue;
  
-               entry->addr = ~0ULL;
                 spin_lock(&vm->status_lock);
                 if (list_empty(&entry->base.vm_status))
                         list_add(&entry->base.vm_status, &vm->relocated);
                 spin_unlock(&vm->status_lock);
-               amdgpu_vm_invalidate_level(vm, entry);
+               amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
         }
  }
  
@@ -1244,38 +793,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
  int amdgpu_vm_update_directories(struct amdgpu_device *adev,
                                  struct amdgpu_vm *vm)
  {
+       struct amdgpu_pte_update_params params;
+       struct amdgpu_job *job;
+       unsigned ndw = 0;
         int r = 0;
  
+       if (list_empty(&vm->relocated))
+               return 0;
+
+restart:
+       memset(&params, 0, sizeof(params));
+       params.adev = adev;
+
+       if (vm->use_cpu_for_update) {
+               r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+               if (unlikely(r))
+                       return r;
+
+               params.func = amdgpu_vm_cpu_set_ptes;
+       } else {
+               ndw = 512 * 8;
+               r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+               if (r)
+                       return r;
+
+               params.ib = &job->ibs[0];
+               params.func = amdgpu_vm_do_set_ptes;
+       }
+
         spin_lock(&vm->status_lock);
         while (!list_empty(&vm->relocated)) {
-               struct amdgpu_vm_bo_base *bo_base;
+               struct amdgpu_vm_bo_base *bo_base, *parent;
+               struct amdgpu_vm_pt *pt, *entry;
                 struct amdgpu_bo *bo;
  
                 bo_base = list_first_entry(&vm->relocated,
                                            struct amdgpu_vm_bo_base,
                                            vm_status);
+               list_del_init(&bo_base->vm_status);
                 spin_unlock(&vm->status_lock);
  
                 bo = bo_base->bo->parent;
-               if (bo) {
-                       struct amdgpu_vm_bo_base *parent;
-                       struct amdgpu_vm_pt *pt;
-
-                       parent = list_first_entry(&bo->va,
-                                                 struct amdgpu_vm_bo_base,
-                                                 bo_list);
-                       pt = container_of(parent, struct amdgpu_vm_pt, base);
-
-                       r = amdgpu_vm_update_level(adev, vm, pt);
-                       if (r) {
-                               amdgpu_vm_invalidate_level(vm, &vm->root);
-                               return r;
-                       }
-                       spin_lock(&vm->status_lock);
-               } else {
+               if (!bo) {
                         spin_lock(&vm->status_lock);
-                       list_del_init(&bo_base->vm_status);
+                       continue;
                 }
+
+               parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
+                                         bo_list);
+               pt = container_of(parent, struct amdgpu_vm_pt, base);
+               entry = container_of(bo_base, struct amdgpu_vm_pt, base);
+
+               amdgpu_vm_update_pde(&params, vm, pt, entry);
+
+               spin_lock(&vm->status_lock);
+               if (!vm->use_cpu_for_update &&
+                   (ndw - params.ib->length_dw) < 32)
+                       break;
         }
         spin_unlock(&vm->status_lock);
  
@@ -1283,8 +857,44 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
                 /* Flush HDP */
                 mb();
                 amdgpu_gart_flush_gpu_tlb(adev, 0);
+       } else if (params.ib->length_dw == 0) {
+               amdgpu_job_free(job);
+       } else {
+               struct amdgpu_bo *root = vm->root.base.bo;
+               struct amdgpu_ring *ring;
+               struct dma_fence *fence;
+
+               ring = container_of(vm->entity.sched, struct amdgpu_ring,
+                                   sched);
+
+               amdgpu_ring_pad_ib(ring, params.ib);
+               amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
+                                AMDGPU_FENCE_OWNER_VM, false);
+               if (root->shadow)
+                       amdgpu_sync_resv(adev, &job->sync,
+                                        root->shadow->tbo.resv,
+                                        AMDGPU_FENCE_OWNER_VM, false);
+
+               WARN_ON(params.ib->length_dw > ndw);
+               r = amdgpu_job_submit(job, ring, &vm->entity,
+                                     AMDGPU_FENCE_OWNER_VM, &fence);
+               if (r)
+                       goto error;
+
+               amdgpu_bo_fence(root, fence, true);
+               dma_fence_put(vm->last_update);
+               vm->last_update = fence;
         }
  
+       if (!list_empty(&vm->relocated))
+               goto restart;
+
+       return 0;
+
+error:
+       amdgpu_vm_invalidate_level(adev, vm, &vm->root,
+                                  adev->vm_manager.root_level);
+       amdgpu_job_free(job);
         return r;
  }
  
@@ -1302,18 +912,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
                          struct amdgpu_vm_pt **entry,
                          struct amdgpu_vm_pt **parent)
  {
-       unsigned idx, level = p->adev->vm_manager.num_level;
+       unsigned level = p->adev->vm_manager.root_level;
  
         *parent = NULL;
         *entry = &p->vm->root;
         while ((*entry)->entries) {
-               idx = addr >> (p->adev->vm_manager.block_size * level--);
-               idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
+               unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
+
                 *parent = *entry;
-               *entry = &(*entry)->entries[idx];
+               *entry = &(*entry)->entries[addr >> shift];
+               addr &= (1ULL << shift) - 1;
         }
  
-       if (level)
+       if (level != AMDGPU_VM_PTB)
                 *entry = NULL;
  }
  
@@ -1335,56 +946,42 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
                                         unsigned nptes, uint64_t dst,
                                         uint64_t flags)
  {
-       bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
         uint64_t pd_addr, pde;
  
         /* In the case of a mixed PT the PDE must point to it*/
-       if (p->adev->asic_type < CHIP_VEGA10 ||
-           nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-           p->src ||
-           !(flags & AMDGPU_PTE_VALID)) {
-
-               dst = amdgpu_bo_gpu_offset(entry->base.bo);
-               dst = amdgpu_gart_get_vm_pde(p->adev, dst);
-               flags = AMDGPU_PTE_VALID;
-       } else {
+       if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
+           nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
                 /* Set the huge page flag to stop scanning at this PDE */
                 flags |= AMDGPU_PDE_PTE;
         }
  
-       if (entry->addr == (dst | flags))
+       if (!(flags & AMDGPU_PDE_PTE)) {
+               if (entry->huge) {
+                       /* Add the entry to the relocated list to update it. */
+                       entry->huge = false;
+                       spin_lock(&p->vm->status_lock);
+                       list_move(&entry->base.vm_status, &p->vm->relocated);
+                       spin_unlock(&p->vm->status_lock);
+               }
                 return;
+       }
  
-       entry->addr = (dst | flags);
-
-       if (use_cpu_update) {
-               /* In case a huge page is replaced with a system
-                * memory mapping, p->pages_addr != NULL and
-                * amdgpu_vm_cpu_set_ptes would try to translate dst
-                * through amdgpu_vm_map_gart. But dst is already a
-                * GPU address (of the page table). Disable
-                * amdgpu_vm_map_gart temporarily.
-                */
-               dma_addr_t *tmp;
-
-               tmp = p->pages_addr;
-               p->pages_addr = NULL;
+       entry->huge = true;
+       amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
+                              &dst, &flags);
  
+       if (p->func == amdgpu_vm_cpu_set_ptes) {
                 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-               pde = pd_addr + (entry - parent->entries) * 8;
-               amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
-
-               p->pages_addr = tmp;
         } else {
                 if (parent->base.bo->shadow) {
                         pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
                         pde = pd_addr + (entry - parent->entries) * 8;
-                       amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+                       p->func(p, pde, dst, 1, 0, flags);
                 }
                 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-               pde = pd_addr + (entry - parent->entries) * 8;
-               amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
         }
+       pde = pd_addr + (entry - parent->entries) * 8;
+       p->func(p, pde, dst, 1, 0, flags);
  }
  
  /**
@@ -1429,7 +1026,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
                 amdgpu_vm_handle_huge_pages(params, entry, parent,
                                             nptes, dst, flags);
                 /* We don't need to update PTEs for huge pages */
-               if (entry->addr & AMDGPU_PDE_PTE)
+               if (entry->huge)
                         continue;
  
                 pt = entry->base.bo;
@@ -1588,14 +1185,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
           *
           * The second command is for the shadow pagetables.
          */
-       ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+       if (vm->root.base.bo->shadow)
+               ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
+       else
+               ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
  
         /* padding, etc. */
         ndw = 64;
  
-       /* one PDE write for each huge page */
-       ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
-
         if (pages_addr) {
                 /* copy commands needed */
                 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
@@ -1639,7 +1236,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
                 addr = 0;
         }
  
-       r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+       r = amdgpu_sync_fence(adev, &job->sync, exclusive, false);
         if (r)
                 goto error_free;
  
@@ -1670,7 +1267,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
  
  error_free:
         amdgpu_job_free(job);
-       amdgpu_vm_invalidate_level(vm, &vm->root);
         return r;
  }
  
@@ -2081,18 +1677,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
         spin_lock(&vm->status_lock);
         while (!list_empty(&vm->moved)) {
                 struct amdgpu_bo_va *bo_va;
+               struct reservation_object *resv;
  
                 bo_va = list_first_entry(&vm->moved,
                         struct amdgpu_bo_va, base.vm_status);
                 spin_unlock(&vm->status_lock);
  
+               resv = bo_va->base.bo->tbo.resv;
+
                 /* Per VM BOs never need to bo cleared in the page tables */
-               clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+               if (resv == vm->root.base.bo->tbo.resv)
+                       clear = false;
+               /* Try to reserve the BO to avoid clearing its ptes */
+               else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+                       clear = false;
+               /* Somebody else is using the BO right now */
+               else
+                       clear = true;
  
                 r = amdgpu_vm_bo_update(adev, bo_va, clear);
                 if (r)
                         return r;
  
+               if (!clear && resv != vm->root.base.bo->tbo.resv)
+                       reservation_object_unlock(resv);
+
                 spin_lock(&vm->status_lock);
         }
         spin_unlock(&vm->status_lock);
@@ -2132,8 +1741,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
         INIT_LIST_HEAD(&bo_va->valids);
         INIT_LIST_HEAD(&bo_va->invalids);
  
-       if (bo)
-               list_add_tail(&bo_va->base.bo_list, &bo->va);
+       if (!bo)
+               return bo_va;
+
+       list_add_tail(&bo_va->base.bo_list, &bo->va);
+
+       if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+               return bo_va;
+
+       if (bo->preferred_domains &
+           amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+               return bo_va;
+
+       /*
+        * We checked all the prerequisites, but it looks like this per VM BO
+        * is currently evicted. add the BO to the evicted list to make sure it
+        * is validated on next VM use to avoid fault.
+        * */
+       spin_lock(&vm->status_lock);
+       list_move_tail(&bo_va->base.vm_status, &vm->evicted);
+       spin_unlock(&vm->status_lock);
  
         return bo_va;
  }
@@ -2555,48 +2182,70 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
                 return ((bits + 3) / 2);
  }
  
-/**
- * amdgpu_vm_set_fragment_size - adjust fragment size in PTE
- *
- * @adev: amdgpu_device pointer
- * @fragment_size_default: the default fragment size if it's set auto
- */
-void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
-                                uint32_t fragment_size_default)
-{
-       if (amdgpu_vm_fragment_size == -1)
-               adev->vm_manager.fragment_size = fragment_size_default;
-       else
-               adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
-}
-
  /**
   * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
   *
   * @adev: amdgpu_device pointer
   * @vm_size: the default vm size if it's set auto
   */
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size,
-                          uint32_t fragment_size_default)
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+                          uint32_t fragment_size_default, unsigned max_level,
+                          unsigned max_bits)
  {
-       /* adjust vm size firstly */
-       if (amdgpu_vm_size == -1)
-               adev->vm_manager.vm_size = vm_size;
-       else
-               adev->vm_manager.vm_size = amdgpu_vm_size;
+       uint64_t tmp;
+
+       /* adjust vm size first */
+       if (amdgpu_vm_size != -1) {
+               unsigned max_size = 1 << (max_bits - 30);
  
-       /* block size depends on vm size */
-       if (amdgpu_vm_block_size == -1)
+               vm_size = amdgpu_vm_size;
+               if (vm_size > max_size) {
+                       dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
+                                amdgpu_vm_size, max_size);
+                       vm_size = max_size;
+               }
+       }
+
+       adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
+
+       tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
+       if (amdgpu_vm_block_size != -1)
+               tmp >>= amdgpu_vm_block_size - 9;
+       tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
+       adev->vm_manager.num_level = min(max_level, (unsigned)tmp);
+       switch (adev->vm_manager.num_level) {
+       case 3:
+               adev->vm_manager.root_level = AMDGPU_VM_PDB2;
+               break;
+       case 2:
+               adev->vm_manager.root_level = AMDGPU_VM_PDB1;
+               break;
+       case 1:
+               adev->vm_manager.root_level = AMDGPU_VM_PDB0;
+               break;
+       default:
+               dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
+       }
+       /* block size depends on vm size and hw setup*/
+       if (amdgpu_vm_block_size != -1)
                 adev->vm_manager.block_size =
-                       amdgpu_vm_get_block_size(adev->vm_manager.vm_size);
+                       min((unsigned)amdgpu_vm_block_size, max_bits
+                           - AMDGPU_GPU_PAGE_SHIFT
+                           - 9 * adev->vm_manager.num_level);
+       else if (adev->vm_manager.num_level > 1)
+               adev->vm_manager.block_size = 9;
         else
-               adev->vm_manager.block_size = amdgpu_vm_block_size;
+               adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
  
-       amdgpu_vm_set_fragment_size(adev, fragment_size_default);
+       if (amdgpu_vm_fragment_size == -1)
+               adev->vm_manager.fragment_size = fragment_size_default;
+       else
+               adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
  
-       DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n",
-               adev->vm_manager.vm_size, adev->vm_manager.block_size,
-               adev->vm_manager.fragment_size);
+       DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+                vm_size, adev->vm_manager.num_level + 1,
+                adev->vm_manager.block_size,
+                adev->vm_manager.fragment_size);
  }
  
  /**
@@ -2615,13 +2264,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                 AMDGPU_VM_PTE_COUNT(adev) * 8);
         unsigned ring_instance;
         struct amdgpu_ring *ring;
-       struct amd_sched_rq *rq;
+       struct drm_sched_rq *rq;
         int r, i;
         u64 flags;
         uint64_t init_pde_value = 0;
  
         vm->va = RB_ROOT_CACHED;
-       vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
                 vm->reserved_vmid[i] = NULL;
         spin_lock_init(&vm->status_lock);
@@ -2635,9 +2283,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
         ring_instance %= adev->vm_manager.vm_pte_num_rings;
         ring = adev->vm_manager.vm_pte_rings[ring_instance];
-       rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
-       r = amd_sched_entity_init(&ring->sched, &vm->entity,
-                                 rq, amdgpu_sched_jobs);
+       rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+       r = drm_sched_entity_init(&ring->sched, &vm->entity,
+                                 rq, amdgpu_sched_jobs, NULL);
         if (r)
                 return r;
  
@@ -2670,7 +2318,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
                                 AMDGPU_GEM_CREATE_SHADOW);
  
-       r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
+       r = amdgpu_bo_create(adev,
+                            amdgpu_vm_bo_size(adev, adev->vm_manager.root_level),
+                            align, true,
                              AMDGPU_GEM_DOMAIN_VRAM,
                              flags,
                              NULL, NULL, init_pde_value, &vm->root.base.bo);
@@ -2716,7 +2366,7 @@ error_free_root:
         vm->root.base.bo = NULL;
  
  error_free_sched_entity:
-       amd_sched_entity_fini(&ring->sched, &vm->entity);
+       drm_sched_entity_fini(&ring->sched, &vm->entity);
  
         return r;
  }
@@ -2724,26 +2374,31 @@ error_free_sched_entity:
  /**
   * amdgpu_vm_free_levels - free PD/PT levels
   *
- * @level: PD/PT starting level to free
+ * @adev: amdgpu device structure
+ * @parent: PD/PT starting level to free
+ * @level: level of parent structure
   *
   * Free the page directory or page table level and all sub levels.
   */
-static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
+static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
+                                 struct amdgpu_vm_pt *parent,
+                                 unsigned level)
  {
-       unsigned i;
+       unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
  
-       if (level->base.bo) {
-               list_del(&level->base.bo_list);
-               list_del(&level->base.vm_status);
-               amdgpu_bo_unref(&level->base.bo->shadow);
-               amdgpu_bo_unref(&level->base.bo);
+       if (parent->base.bo) {
+               list_del(&parent->base.bo_list);
+               list_del(&parent->base.vm_status);
+               amdgpu_bo_unref(&parent->base.bo->shadow);
+               amdgpu_bo_unref(&parent->base.bo);
         }
  
-       if (level->entries)
-               for (i = 0; i <= level->last_entry_used; i++)
-                       amdgpu_vm_free_levels(&level->entries[i]);
+       if (parent->entries)
+               for (i = 0; i < num_entries; i++)
+                       amdgpu_vm_free_levels(adev, &parent->entries[i],
+                                             level + 1);
  
-       kvfree(level->entries);
+       kvfree(parent->entries);
  }
  
  /**
@@ -2775,7 +2430,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
                 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
         }
  
-       amd_sched_entity_fini(vm->entity.sched, &vm->entity);
+       drm_sched_entity_fini(vm->entity.sched, &vm->entity);
  
         if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
                 dev_err(adev->dev, "still active bo inside vm\n");
@@ -2801,13 +2456,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
         if (r) {
                 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
         } else {
-               amdgpu_vm_free_levels(&vm->root);
+               amdgpu_vm_free_levels(adev, &vm->root,
+                                     adev->vm_manager.root_level);
                 amdgpu_bo_unreserve(root);
         }
         amdgpu_bo_unref(&root);
         dma_fence_put(vm->last_update);
         for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
-               amdgpu_vm_free_reserved_vmid(adev, vm, i);
+               amdgpu_vmid_free_reserved(adev, vm, i);
  }
  
  /**
@@ -2826,17 +2482,21 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
  
         spin_lock(&adev->vm_manager.pasid_lock);
         vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-       spin_unlock(&adev->vm_manager.pasid_lock);
-       if (!vm)
+       if (!vm) {
                 /* VM not found, can't track fault credit */
+               spin_unlock(&adev->vm_manager.pasid_lock);
                 return true;
+       }
  
         /* No lock needed. only accessed by IRQ handler */
-       if (!vm->fault_credit)
+       if (!vm->fault_credit) {
                 /* Too many faults in this VM */
+               spin_unlock(&adev->vm_manager.pasid_lock);
                 return false;
+       }
  
         vm->fault_credit--;
+       spin_unlock(&adev->vm_manager.pasid_lock);
         return true;
  }
  
@@ -2849,23 +2509,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
   */
  void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  {
-       unsigned i, j;
-
-       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-               struct amdgpu_vm_id_manager *id_mgr =
-                       &adev->vm_manager.id_mgr[i];
-
-               mutex_init(&id_mgr->lock);
-               INIT_LIST_HEAD(&id_mgr->ids_lru);
-               atomic_set(&id_mgr->reserved_vmid_num, 0);
+       unsigned i;
  
-               /* skip over VMID 0, since it is the system VM */
-               for (j = 1; j < id_mgr->num_ids; ++j) {
-                       amdgpu_vm_reset_id(adev, i, j);
-                       amdgpu_sync_create(&id_mgr->ids[i].active);
-                       list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
-               }
-       }
+       amdgpu_vmid_mgr_init(adev);
  
         adev->vm_manager.fence_context =
                 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
@@ -2873,7 +2519,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
                 adev->vm_manager.seqno[i] = 0;
  
         atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
-       atomic64_set(&adev->vm_manager.client_counter, 0);
         spin_lock_init(&adev->vm_manager.prt_lock);
         atomic_set(&adev->vm_manager.num_prt_users, 0);
  
@@ -2906,24 +2551,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
   */
  void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
  {
-       unsigned i, j;
-
         WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
         idr_destroy(&adev->vm_manager.pasid_idr);
  
-       for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-               struct amdgpu_vm_id_manager *id_mgr =
-                       &adev->vm_manager.id_mgr[i];
-
-               mutex_destroy(&id_mgr->lock);
-               for (j = 0; j < AMDGPU_NUM_VM; ++j) {
-                       struct amdgpu_vm_id *id = &id_mgr->ids[j];
-
-                       amdgpu_sync_free(&id->active);
-                       dma_fence_put(id->flushed_updates);
-                       dma_fence_put(id->last_flush);
-               }
-       }
+       amdgpu_vmid_mgr_fini(adev);
  }
  
  int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -2936,13 +2567,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
         switch (args->in.op) {
         case AMDGPU_VM_OP_RESERVE_VMID:
                 /* current, we only have requirement to reserve vmid from gfxhub */
-               r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm,
-                                                 AMDGPU_GFXHUB);
+               r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
                 if (r)
                         return r;
                 break;
         case AMDGPU_VM_OP_UNRESERVE_VMID:
-               amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB);
+               amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB);
                 break;
         default:
                 return -EINVAL;