]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
net: bgmac: Fix return value check for fixed_phy_register()
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_vm.c
index 3c0310576b3bfb515d9dccc2cc3e6fffd935d80e..ec1ec08d40584865b8c378b719f5ecbb5387a794 100644 (file)
@@ -266,6 +266,32 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
        spin_unlock(&vm_bo->vm->status_lock);
 }
 
+/**
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
+ *
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
+ */
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
+{
+       struct amdgpu_vm_bo_base *vm_bo, *tmp;
+
+       spin_lock(&vm->status_lock);
+       list_splice_init(&vm->done, &vm->invalidated);
+       list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+               vm_bo->moved = true;
+       list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+               struct amdgpu_bo *bo = vm_bo->bo;
+
+               if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+                       list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+               else if (bo->parent)
+                       list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+       }
+       spin_unlock(&vm->status_lock);
+}
+
 /**
  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
  *
@@ -351,6 +377,58 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
        spin_unlock(&adev->mman.bdev.lru_lock);
 }
 
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+                                  struct amdgpu_vm *vm)
+{
+       int r;
+
+       r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+                                 adev->vm_manager.vm_pte_scheds,
+                                 adev->vm_manager.vm_pte_num_scheds, NULL);
+       if (r)
+               goto error;
+
+       return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+                                    adev->vm_manager.vm_pte_scheds,
+                                    adev->vm_manager.vm_pte_num_scheds, NULL);
+
+error:
+       drm_sched_entity_destroy(&vm->immediate);
+       return r;
+}
+
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+       drm_sched_entity_destroy(&vm->immediate);
+       drm_sched_entity_destroy(&vm->delayed);
+}
+
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+       uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+       if (!vm)
+               return result;
+
+       result += vm->generation;
+       /* Add one if the page tables will be re-generated on next CS */
+       if (drm_sched_entity_error(&vm->delayed))
+               ++result;
+
+       return result;
+}
+
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -373,6 +451,15 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        struct amdgpu_bo *bo;
        int r;
 
+       if (drm_sched_entity_error(&vm->delayed)) {
+               ++vm->generation;
+               amdgpu_vm_bo_reset_state_machine(vm);
+               amdgpu_vm_fini_entities(vm);
+               r = amdgpu_vm_init_entities(adev, vm);
+               if (r)
+                       return r;
+       }
+
        spin_lock(&vm->status_lock);
        while (!list_empty(&vm->evicted)) {
                bo_base = list_first_entry(&vm->evicted,
@@ -920,42 +1007,51 @@ error_unlock:
        return r;
 }
 
+static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
+                                   struct amdgpu_mem_stats *stats)
+{
+       struct amdgpu_vm *vm = bo_va->base.vm;
+       struct amdgpu_bo *bo = bo_va->base.bo;
+
+       if (!bo)
+               return;
+
+       /*
+        * For now ignore BOs which are currently locked and potentially
+        * changing their location.
+        */
+       if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
+           !dma_resv_trylock(bo->tbo.base.resv))
+               return;
+
+       amdgpu_bo_get_memory(bo, stats);
+       if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
+           dma_resv_unlock(bo->tbo.base.resv);
+}
+
 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
                          struct amdgpu_mem_stats *stats)
 {
        struct amdgpu_bo_va *bo_va, *tmp;
 
        spin_lock(&vm->status_lock);
-       list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
-       list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
-       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
-       list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
-       list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
-       list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
-               if (!bo_va->base.bo)
-                       continue;
-               amdgpu_bo_get_memory(bo_va->base.bo, stats);
-       }
+       list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
+       list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
+       list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
+       list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
+       list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
+       list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
        spin_unlock(&vm->status_lock);
 }
 
@@ -1358,6 +1454,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
        amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
 
        bo_va->ref_count = 1;
+       bo_va->last_pt_update = dma_fence_get_stub();
        INIT_LIST_HEAD(&bo_va->valids);
        INIT_LIST_HEAD(&bo_va->invalids);
 
@@ -1433,14 +1530,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
        uint64_t eaddr;
 
        /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
-           size == 0 || size & ~PAGE_MASK)
+       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+               return -EINVAL;
+       if (saddr + size <= saddr || offset + size <= offset)
                return -EINVAL;
 
        /* make sure object fit at this offset */
        eaddr = saddr + size - 1;
-       if (saddr >= eaddr ||
-           (bo && offset + size > amdgpu_bo_size(bo)) ||
+       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
            (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
                return -EINVAL;
 
@@ -1499,14 +1596,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        int r;
 
        /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
-           size == 0 || size & ~PAGE_MASK)
+       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
+               return -EINVAL;
+       if (saddr + size <= saddr || offset + size <= offset)
                return -EINVAL;
 
        /* make sure object fit at this offset */
        eaddr = saddr + size - 1;
-       if (saddr >= eaddr ||
-           (bo && offset + size > amdgpu_bo_size(bo)) ||
+       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
            (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
                return -EINVAL;
 
@@ -1674,18 +1771,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
        /* Insert partial mapping before the range */
        if (!list_empty(&before->list)) {
+               struct amdgpu_bo *bo = before->bo_va->base.bo;
+
                amdgpu_vm_it_insert(before, &vm->va);
                if (before->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
+
+               if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+                   !before->bo_va->base.moved)
+                       amdgpu_vm_bo_moved(&before->bo_va->base);
        } else {
                kfree(before);
        }
 
        /* Insert partial mapping after the range */
        if (!list_empty(&after->list)) {
+               struct amdgpu_bo *bo = after->bo_va->base.bo;
+
                amdgpu_vm_it_insert(after, &vm->va);
                if (after->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
+
+               if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
+                   !after->bo_va->base.moved)
+                       amdgpu_vm_bo_moved(&after->bo_va->base);
        } else {
                kfree(after);
        }
@@ -2012,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @xcp_id: GPU partition selection id
  *
  * Init @vm fields.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
 {
        struct amdgpu_bo *root_bo;
        struct amdgpu_bo_vm *root;
@@ -2038,19 +2148,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        INIT_LIST_HEAD(&vm->pt_freed);
        INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
 
-       /* create scheduler entities for page table updates */
-       r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
-                                 adev->vm_manager.vm_pte_scheds,
-                                 adev->vm_manager.vm_pte_num_scheds, NULL);
+       r = amdgpu_vm_init_entities(adev, vm);
        if (r)
                return r;
 
-       r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
-                                 adev->vm_manager.vm_pte_scheds,
-                                 adev->vm_manager.vm_pte_num_scheds, NULL);
-       if (r)
-               goto error_free_immediate;
-
        vm->pte_support_ats = false;
        vm->is_compute_context = false;
 
@@ -2067,15 +2168,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
                vm->update_funcs = &amdgpu_vm_cpu_funcs;
        else
                vm->update_funcs = &amdgpu_vm_sdma_funcs;
-       vm->last_update = NULL;
+
+       vm->last_update = dma_fence_get_stub();
        vm->last_unlocked = dma_fence_get_stub();
        vm->last_tlb_flush = dma_fence_get_stub();
+       vm->generation = 0;
 
        mutex_init(&vm->eviction_lock);
        vm->evicting = false;
 
        r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
-                               false, &root);
+                               false, &root, xcp_id);
        if (r)
                goto error_free_delayed;
        root_bo = &root->bo;
@@ -2110,10 +2213,7 @@ error_free_root:
 error_free_delayed:
        dma_fence_put(vm->last_tlb_flush);
        dma_fence_put(vm->last_unlocked);
-       drm_sched_entity_destroy(&vm->delayed);
-
-error_free_immediate:
-       drm_sched_entity_destroy(&vm->immediate);
+       amdgpu_vm_fini_entities(vm);
 
        return r;
 }
@@ -2146,16 +2246,16 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        if (r)
                return r;
 
-       /* Sanity checks */
-       if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-               r = -EINVAL;
-               goto unreserve_bo;
-       }
-
        /* Check if PD needs to be reinitialized and do it before
         * changing any other state, in case it fails.
         */
        if (pte_support_ats != vm->pte_support_ats) {
+               /* Sanity checks */
+               if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
+                       r = -EINVAL;
+                       goto unreserve_bo;
+               }
+
                vm->pte_support_ats = pte_support_ats;
                r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
                                       false);
@@ -2192,7 +2292,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
                goto unreserve_bo;
 
        dma_fence_put(vm->last_update);
-       vm->last_update = NULL;
+       vm->last_update = dma_fence_get_stub();
        vm->is_compute_context = true;
 
        /* Free the shadow bo for compute VM */
@@ -2266,8 +2366,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        amdgpu_bo_unref(&root);
        WARN_ON(vm->root.bo);
 
-       drm_sched_entity_destroy(&vm->immediate);
-       drm_sched_entity_destroy(&vm->delayed);
+       amdgpu_vm_fini_entities(vm);
 
        if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
                dev_err(adev->dev, "still active bo inside vm\n");
@@ -2282,8 +2381,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        }
 
        dma_fence_put(vm->last_update);
-       for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
-               amdgpu_vmid_free_reserved(adev, vm, i);
+
+       for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+               if (vm->reserved_vmid[i]) {
+                       amdgpu_vmid_free_reserved(adev, i);
+                       vm->reserved_vmid[i] = false;
+               }
+       }
+
 }
 
 /**
@@ -2366,18 +2471,25 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
        union drm_amdgpu_vm *args = data;
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct amdgpu_fpriv *fpriv = filp->driver_priv;
-       int r;
+
+       /* No valid flags defined yet */
+       if (args->in.flags)
+               return -EINVAL;
 
        switch (args->in.op) {
        case AMDGPU_VM_OP_RESERVE_VMID:
                /* We only have requirement to reserve vmid from gfxhub */
-               r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
-                                              AMDGPU_GFXHUB_0);
-               if (r)
-                       return r;
+               if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+                       amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+                       fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
+               }
+
                break;
        case AMDGPU_VM_OP_UNRESERVE_VMID:
-               amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
+               if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+                       amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
+                       fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
+               }
                break;
        default:
                return -EINVAL;
@@ -2432,6 +2544,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * amdgpu_vm_handle_fault - graceful handling of VM faults.
  * @adev: amdgpu device pointer
  * @pasid: PASID of the VM
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ *           GFX 9.4.3.
  * @addr: Address of the fault
  * @write_fault: true is write fault, false is read fault
  *
@@ -2439,7 +2554,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           uint64_t addr, bool write_fault)
+                           u32 vmid, u32 node_id, uint64_t addr,
+                           bool write_fault)
 {
        bool is_compute_context = false;
        struct amdgpu_bo *root;
@@ -2463,8 +2579,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 
        addr /= AMDGPU_GPU_PAGE_SIZE;
 
-       if (is_compute_context &&
-           !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
+       if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+           node_id, addr, write_fault)) {
                amdgpu_bo_unref(&root);
                return true;
        }
This page took 0.047448 seconds and 4 git commands to generate.