]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
Merge tag 'kvm-x86-hyperv-6.8' of https://github.com/kvm-x86/linux into HEAD
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_mes.c
index f808841310fdf0953b857c45fedbd8a9fe0694ba..9ddbf1494326a0d7e6606f6f25fe06f32333917b 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include <linux/firmware.h>
+#include <drm/drm_exec.h>
 
 #include "amdgpu_mes.h"
 #include "amdgpu.h"
@@ -38,120 +39,70 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
                       PAGE_SIZE);
 }
 
-int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
-                                     unsigned int *doorbell_index)
-{
-       int r = ida_simple_get(&adev->mes.doorbell_ida, 2,
-                              adev->mes.max_doorbell_slices,
-                              GFP_KERNEL);
-       if (r > 0)
-               *doorbell_index = r;
-
-       return r;
-}
-
-void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
-                                     unsigned int doorbell_index)
-{
-       if (doorbell_index)
-               ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index);
-}
-
-unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
-                                       struct amdgpu_device *adev,
-                                       uint32_t doorbell_index,
-                                       unsigned int doorbell_id)
-{
-       return ((doorbell_index *
-               amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) +
-               doorbell_id * 2);
-}
-
-static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev,
+static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
                                         struct amdgpu_mes_process *process,
                                         int ip_type, uint64_t *doorbell_index)
 {
        unsigned int offset, found;
+       struct amdgpu_mes *mes = &adev->mes;
 
-       if (ip_type == AMDGPU_RING_TYPE_SDMA) {
+       if (ip_type == AMDGPU_RING_TYPE_SDMA)
                offset = adev->doorbell_index.sdma_engine[0];
-               found = find_next_zero_bit(process->doorbell_bitmap,
-                                          AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
-                                          offset);
-       } else {
-               found = find_first_zero_bit(process->doorbell_bitmap,
-                                           AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS);
-       }
+       else
+               offset = 0;
 
-       if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+       found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
+       if (found >= mes->num_mes_dbs) {
                DRM_WARN("No doorbell available\n");
                return -ENOSPC;
        }
 
-       set_bit(found, process->doorbell_bitmap);
-
-       *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
-                               process->doorbell_index, found);
+       set_bit(found, mes->doorbell_bitmap);
 
+       /* Get the absolute doorbell index on BAR */
+       *doorbell_index = mes->db_start_dw_offset + found * 2;
        return 0;
 }
 
-static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev,
+static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
                                           struct amdgpu_mes_process *process,
                                           uint32_t doorbell_index)
 {
-       unsigned int old, doorbell_id;
+       unsigned int old, rel_index;
+       struct amdgpu_mes *mes = &adev->mes;
 
-       doorbell_id = doorbell_index -
-               (process->doorbell_index *
-                amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32);
-       doorbell_id /= 2;
-
-       old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap);
+       /* Find the relative index of the doorbell in this object */
+       rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
+       old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
        WARN_ON(!old);
 }
 
 static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
 {
-       size_t doorbell_start_offset;
-       size_t doorbell_aperture_size;
-       size_t doorbell_process_limit;
-       size_t aggregated_doorbell_start;
        int i;
+       struct amdgpu_mes *mes = &adev->mes;
 
-       aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
-       aggregated_doorbell_start =
-               roundup(aggregated_doorbell_start, PAGE_SIZE);
-
-       doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
-       doorbell_start_offset =
-               roundup(doorbell_start_offset,
-                       amdgpu_mes_doorbell_process_slice(adev));
-
-       doorbell_aperture_size = adev->doorbell.size;
-       doorbell_aperture_size =
-                       rounddown(doorbell_aperture_size,
-                                 amdgpu_mes_doorbell_process_slice(adev));
-
-       if (doorbell_aperture_size > doorbell_start_offset)
-               doorbell_process_limit =
-                       (doorbell_aperture_size - doorbell_start_offset) /
-                       amdgpu_mes_doorbell_process_slice(adev);
-       else
-               return -ENOSPC;
-
-       adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
-       adev->mes.max_doorbell_slices = doorbell_process_limit;
+       /* Bitmap for dynamic allocation of kernel doorbells */
+       mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
+       if (!mes->doorbell_bitmap) {
+               DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
+               return -ENOMEM;
+       }
 
-       /* allocate Qword range for aggregated doorbell */
-       for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
-               adev->mes.aggregated_doorbells[i] =
-                       aggregated_doorbell_start / sizeof(u32) + i * 2;
+       mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
+       for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
+               adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
+               set_bit(i, mes->doorbell_bitmap);
+       }
 
-       DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
        return 0;
 }
 
+static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
+{
+       bitmap_free(adev->mes.doorbell_bitmap);
+}
+
 int amdgpu_mes_init(struct amdgpu_device *adev)
 {
        int i, r;
@@ -181,7 +132,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
                adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
 
        for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
-               if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+               if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
+                   IP_VERSION(6, 0, 0))
                        adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
                /* zero sdma_hqd_mask for non-existent engine */
                else if (adev->sdma.num_instances == 1)
@@ -250,6 +202,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
        amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
        amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
        amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
+       amdgpu_mes_doorbell_free(adev);
 
        idr_destroy(&adev->mes.pasid_idr);
        idr_destroy(&adev->mes.gang_id_idr);
@@ -278,15 +231,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
                return -ENOMEM;
        }
 
-       process->doorbell_bitmap =
-               kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
-                                    BITS_PER_BYTE), GFP_KERNEL);
-       if (!process->doorbell_bitmap) {
-               DRM_ERROR("failed to allocate doorbell bitmap\n");
-               kfree(process);
-               return -ENOMEM;
-       }
-
        /* allocate the process context bo and map it */
        r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT,
@@ -313,15 +257,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
                goto clean_up_ctx;
        }
 
-       /* allocate the starting doorbell index of the process */
-       r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
-       if (r < 0) {
-               DRM_ERROR("failed to allocate doorbell for process\n");
-               goto clean_up_pasid;
-       }
-
-       DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
-
        INIT_LIST_HEAD(&process->gang_list);
        process->vm = vm;
        process->pasid = pasid;
@@ -331,15 +266,12 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
        amdgpu_mes_unlock(&adev->mes);
        return 0;
 
-clean_up_pasid:
-       idr_remove(&adev->mes.pasid_idr, pasid);
-       amdgpu_mes_unlock(&adev->mes);
 clean_up_ctx:
+       amdgpu_mes_unlock(&adev->mes);
        amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                              &process->proc_ctx_gpu_addr,
                              &process->proc_ctx_cpu_ptr);
 clean_up_memory:
-       kfree(process->doorbell_bitmap);
        kfree(process);
        return r;
 }
@@ -385,7 +317,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
                idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
        }
 
-       amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
        idr_remove(&adev->mes.pasid_idr, pasid);
        amdgpu_mes_unlock(&adev->mes);
 
@@ -407,7 +338,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
        amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                              &process->proc_ctx_gpu_addr,
                              &process->proc_ctx_cpu_ptr);
-       kfree(process->doorbell_bitmap);
        kfree(process);
 }
 
@@ -627,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
        mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
        mqd_prop.hqd_active = false;
 
+       if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+           p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+               mutex_lock(&adev->srbm_mutex);
+               amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
+       }
+
        mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
 
+       if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
+           p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+               amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+               mutex_unlock(&adev->srbm_mutex);
+       }
+
        amdgpu_bo_unreserve(q->mqd_obj);
 }
 
@@ -642,6 +584,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
        unsigned long flags;
        int r;
 
+       memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
+
        /* allocate the mes queue buffer */
        queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
        if (!queue) {
@@ -679,7 +623,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
        *queue_id = queue->queue_id = r;
 
        /* allocate a doorbell index for the queue */
-       r = amdgpu_mes_queue_doorbell_get(adev, gang->process,
+       r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
                                          qprops->queue_type,
                                          &qprops->doorbell_off);
        if (r)
@@ -737,7 +681,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
        return 0;
 
 clean_up_doorbell:
-       amdgpu_mes_queue_doorbell_free(adev, gang->process,
+       amdgpu_mes_kernel_doorbell_free(adev, gang->process,
                                       qprops->doorbell_off);
 clean_up_queue_id:
        spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
@@ -792,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
                          queue_id);
 
        list_del(&queue->list);
-       amdgpu_mes_queue_doorbell_free(adev, gang->process,
+       amdgpu_mes_kernel_doorbell_free(adev, gang->process,
                                       queue->doorbell_off);
        amdgpu_mes_unlock(&adev->mes);
 
@@ -1062,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
        switch (queue_type) {
        case AMDGPU_RING_TYPE_GFX:
                ring->funcs = adev->gfx.gfx_ring[0].funcs;
+               ring->me = adev->gfx.gfx_ring[0].me;
+               ring->pipe = adev->gfx.gfx_ring[0].pipe;
                break;
        case AMDGPU_RING_TYPE_COMPUTE:
                ring->funcs = adev->gfx.compute_ring[0].funcs;
+               ring->me = adev->gfx.compute_ring[0].me;
+               ring->pipe = adev->gfx.compute_ring[0].pipe;
                break;
        case AMDGPU_RING_TYPE_SDMA:
                ring->funcs = adev->sdma.instance[0].ring.funcs;
@@ -1168,34 +1116,31 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
                                 struct amdgpu_mes_ctx_data *ctx_data)
 {
        struct amdgpu_bo_va *bo_va;
-       struct ww_acquire_ctx ticket;
-       struct list_head list;
-       struct amdgpu_bo_list_entry pd;
-       struct ttm_validate_buffer csa_tv;
        struct amdgpu_sync sync;
+       struct drm_exec exec;
        int r;
 
        amdgpu_sync_create(&sync);
-       INIT_LIST_HEAD(&list);
-       INIT_LIST_HEAD(&csa_tv.head);
-
-       csa_tv.bo = &ctx_data->meta_data_obj->tbo;
-       csa_tv.num_shared = 1;
-
-       list_add(&csa_tv.head, &list);
-       amdgpu_vm_get_pd_bo(vm, &list, &pd);
 
-       r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
-       if (r) {
-               DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
-               return r;
+       drm_exec_init(&exec, 0);
+       drm_exec_until_all_locked(&exec) {
+               r = drm_exec_lock_obj(&exec,
+                                     &ctx_data->meta_data_obj->tbo.base);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto error_fini_exec;
+
+               r = amdgpu_vm_lock_pd(vm, &exec, 0);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto error_fini_exec;
        }
 
        bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
        if (!bo_va) {
-               ttm_eu_backoff_reservation(&ticket, &list);
                DRM_ERROR("failed to create bo_va for meta data BO\n");
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto error_fini_exec;
        }
 
        r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
@@ -1205,33 +1150,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
 
        if (r) {
                DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
-               goto error;
+               goto error_del_bo_va;
        }
 
        r = amdgpu_vm_bo_update(adev, bo_va, false);
        if (r) {
                DRM_ERROR("failed to do vm_bo_update on meta data\n");
-               goto error;
+               goto error_del_bo_va;
        }
        amdgpu_sync_fence(&sync, bo_va->last_pt_update);
 
        r = amdgpu_vm_update_pdes(adev, vm, false);
        if (r) {
                DRM_ERROR("failed to update pdes on meta data\n");
-               goto error;
+               goto error_del_bo_va;
        }
        amdgpu_sync_fence(&sync, vm->last_update);
 
        amdgpu_sync_wait(&sync, false);
-       ttm_eu_backoff_reservation(&ticket, &list);
+       drm_exec_fini(&exec);
 
        amdgpu_sync_free(&sync);
        ctx_data->meta_data_va = bo_va;
        return 0;
 
-error:
+error_del_bo_va:
        amdgpu_vm_bo_del(adev, bo_va);
-       ttm_eu_backoff_reservation(&ticket, &list);
+
+error_fini_exec:
+       drm_exec_fini(&exec);
        amdgpu_sync_free(&sync);
        return r;
 }
@@ -1242,34 +1189,30 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
        struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
        struct amdgpu_bo *bo = ctx_data->meta_data_obj;
        struct amdgpu_vm *vm = bo_va->base.vm;
-       struct amdgpu_bo_list_entry vm_pd;
-       struct list_head list, duplicates;
-       struct dma_fence *fence = NULL;
-       struct ttm_validate_buffer tv;
-       struct ww_acquire_ctx ticket;
-       long r = 0;
-
-       INIT_LIST_HEAD(&list);
-       INIT_LIST_HEAD(&duplicates);
-
-       tv.bo = &bo->tbo;
-       tv.num_shared = 2;
-       list_add(&tv.head, &list);
-
-       amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
-
-       r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-       if (r) {
-               dev_err(adev->dev, "leaking bo va because "
-                       "we fail to reserve bo (%ld)\n", r);
-               return r;
+       struct dma_fence *fence;
+       struct drm_exec exec;
+       long r;
+
+       drm_exec_init(&exec, 0);
+       drm_exec_until_all_locked(&exec) {
+               r = drm_exec_lock_obj(&exec,
+                                     &ctx_data->meta_data_obj->tbo.base);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto out_unlock;
+
+               r = amdgpu_vm_lock_pd(vm, &exec, 0);
+               drm_exec_retry_on_contention(&exec);
+               if (unlikely(r))
+                       goto out_unlock;
        }
 
        amdgpu_vm_bo_del(adev, bo_va);
        if (!amdgpu_vm_ready(vm))
                goto out_unlock;
 
-       r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+       r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+                                  &fence);
        if (r)
                goto out_unlock;
        if (fence) {
@@ -1288,7 +1231,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
 out_unlock:
        if (unlikely(r < 0))
                dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
-       ttm_eu_backoff_reservation(&ticket, &list);
+       drm_exec_fini(&exec);
 
        return r;
 }
@@ -1409,8 +1352,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
 
        for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
                /* On GFX v10.3, fw hasn't supported to map sdma queue. */
-               if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
-                   adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+               if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+                           IP_VERSION(10, 3, 0) &&
+                   amdgpu_ip_version(adev, GC_HWIP, 0) <
+                           IP_VERSION(11, 0, 0) &&
                    queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
                        continue;
 
@@ -1471,7 +1416,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
 
        amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
                                       sizeof(ucode_prefix));
-       if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
                snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
                         ucode_prefix,
                         pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
This page took 0.051834 seconds and 4 git commands to generate.