Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_gfx.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index 90f5d302d5f3e4b452c836c76099a7ae5f29d26b..a33d4bc34cee746cdee7e70103190fee0aa9f2cd 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -28,6 +28,7 @@
  #include "amdgpu_gfx.h"
  #include "amdgpu_rlc.h"
  #include "amdgpu_ras.h"
  #include "amdgpu_gfx.h"
  #include "amdgpu_rlc.h"
  #include "amdgpu_ras.h"
+#include "amdgpu_xcp.h"
  
  /* delay 0.1 second to enable gfx off feature */
  #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
  
  /* delay 0.1 second to enable gfx off feature */
  #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
@@ -209,12 +210,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
         int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
                                      adev->gfx.mec.num_queue_per_pipe,
                                      adev->gfx.num_compute_rings);
         int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
                                      adev->gfx.mec.num_queue_per_pipe,
                                      adev->gfx.num_compute_rings);
-       int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1;
+       int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
  
         if (multipipe_policy) {
                 /* policy: make queues evenly cross all pipes on MEC1 only
                  * for multiple xcc, just use the original policy for simplicity */
  
         if (multipipe_policy) {
                 /* policy: make queues evenly cross all pipes on MEC1 only
                  * for multiple xcc, just use the original policy for simplicity */
-               for (j = 0; j < num_xcd; j++) {
+               for (j = 0; j < num_xcc; j++) {
                         for (i = 0; i < max_queues_per_mec; i++) {
                                 pipe = i % adev->gfx.mec.num_pipe_per_mec;
                                 queue = (i / adev->gfx.mec.num_pipe_per_mec) %
                         for (i = 0; i < max_queues_per_mec; i++) {
                                 pipe = i % adev->gfx.mec.num_pipe_per_mec;
                                 queue = (i / adev->gfx.mec.num_pipe_per_mec) %
@@ -226,13 +227,13 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
                 }
         } else {
                 /* policy: amdgpu owns all queues in the given pipe */
                 }
         } else {
                 /* policy: amdgpu owns all queues in the given pipe */
-               for (j = 0; j < num_xcd; j++) {
+               for (j = 0; j < num_xcc; j++) {
                         for (i = 0; i < max_queues_per_mec; ++i)
                                 set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
                 }
         }
  
                         for (i = 0; i < max_queues_per_mec; ++i)
                                 set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
                 }
         }
  
-       for (j = 0; j < num_xcd; j++) {
+       for (j = 0; j < num_xcc; j++) {
                 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
                         bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
         }
                 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
                         bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
         }
@@ -313,14 +314,12 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
         ring->adev = NULL;
         ring->ring_obj = NULL;
         ring->use_doorbell = true;
         ring->adev = NULL;
         ring->ring_obj = NULL;
         ring->use_doorbell = true;
-       ring->doorbell_index = adev->doorbell_index.kiq;
         ring->xcc_id = xcc_id;
         ring->xcc_id = xcc_id;
-       ring->vm_hub = AMDGPU_GFXHUB_0;
-       if (xcc_id >= 1)
-               ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start +
-                                       xcc_id - 1;
-       else
-               ring->doorbell_index = adev->doorbell_index.kiq;
+       ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+       ring->doorbell_index =
+               (adev->doorbell_index.kiq +
+                xcc_id * adev->doorbell_index.xcc_doorbell_range)
+               << 1;
  
         r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
         if (r)
  
         r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
         if (r)
@@ -382,6 +381,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
         int r, i, j;
         struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *ring = &kiq->ring;
         int r, i, j;
         struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *ring = &kiq->ring;
+       u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+       /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+       if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+               domain |= AMDGPU_GEM_DOMAIN_VRAM;
  
         /* create MQD for KIQ */
         if (!adev->enable_mes_kiq && !ring->mqd_obj) {
  
         /* create MQD for KIQ */
         if (!adev->enable_mes_kiq && !ring->mqd_obj) {
@@ -413,7 +417,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                         ring = &adev->gfx.gfx_ring[i];
                         if (!ring->mqd_obj) {
                                 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
                         ring = &adev->gfx.gfx_ring[i];
                         if (!ring->mqd_obj) {
                                 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
-                                                           AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+                                                           domain, &ring->mqd_obj,
                                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
                                 if (r) {
                                         dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
                                                             &ring->mqd_gpu_addr, &ring->mqd_ptr);
                                 if (r) {
                                         dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
@@ -435,7 +439,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                 ring = &adev->gfx.compute_ring[j];
                 if (!ring->mqd_obj) {
                         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
                 ring = &adev->gfx.compute_ring[j];
                 if (!ring->mqd_obj) {
                         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
-                                                   AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+                                                   domain, &ring->mqd_obj,
                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
                         if (r) {
                                 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
                                                     &ring->mqd_gpu_addr, &ring->mqd_ptr);
                         if (r) {
                                 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
@@ -480,7 +484,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
  
         ring = &kiq->ring;
         kfree(kiq->mqd_backup);
  
         ring = &kiq->ring;
         kfree(kiq->mqd_backup);
-       kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
         amdgpu_bo_free_kernel(&ring->mqd_obj,
                               &ring->mqd_gpu_addr,
                               &ring->mqd_ptr);
         amdgpu_bo_free_kernel(&ring->mqd_obj,
                               &ring->mqd_gpu_addr,
                               &ring->mqd_ptr);
@@ -497,22 +500,20 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
                 return -EINVAL;
  
         spin_lock(&kiq->ring_lock);
                 return -EINVAL;
  
         spin_lock(&kiq->ring_lock);
-       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
-               if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
-                                               adev->gfx.num_compute_rings)) {
-                       spin_unlock(&kiq->ring_lock);
-                       return -ENOMEM;
-               }
+       if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+                                       adev->gfx.num_compute_rings)) {
+               spin_unlock(&kiq->ring_lock);
+               return -ENOMEM;
+       }
  
  
-               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-                       j = i + xcc_id * adev->gfx.num_compute_rings;
-                       kiq->pmf->kiq_unmap_queues(kiq_ring,
-                                                  &adev->gfx.compute_ring[i],
-                                                  RESET_QUEUES, 0, 0);
-               }
+       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+               j = i + xcc_id * adev->gfx.num_compute_rings;
+               kiq->pmf->kiq_unmap_queues(kiq_ring,
+                                          &adev->gfx.compute_ring[j],
+                                          RESET_QUEUES, 0, 0);
         }
  
         }
  
-       if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+       if (kiq_ring->sched.ready && !adev->job_hang)
                 r = amdgpu_ring_test_helper(kiq_ring);
         spin_unlock(&kiq->ring_lock);
  
                 r = amdgpu_ring_test_helper(kiq_ring);
         spin_unlock(&kiq->ring_lock);
  
@@ -540,7 +541,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                         j = i + xcc_id * adev->gfx.num_gfx_rings;
                         kiq->pmf->kiq_unmap_queues(kiq_ring,
                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                         j = i + xcc_id * adev->gfx.num_gfx_rings;
                         kiq->pmf->kiq_unmap_queues(kiq_ring,
-                                                  &adev->gfx.gfx_ring[i],
+                                                  &adev->gfx.gfx_ring[j],
                                                    PREEMPT_QUEUES, 0, 0);
                 }
         }
                                                    PREEMPT_QUEUES, 0, 0);
                 }
         }
@@ -592,27 +593,26 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
  
         DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
                                                         kiq_ring->queue);
  
         DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
                                                         kiq_ring->queue);
+       amdgpu_device_flush_hdp(adev, NULL);
+
         spin_lock(&kiq->ring_lock);
         spin_lock(&kiq->ring_lock);
-       /* No need to map kcq on the slave */
-       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
-               r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
-                                               adev->gfx.num_compute_rings +
-                                               kiq->pmf->set_resources_size);
-               if (r) {
-                       DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
-                       return r;
-               }
+       r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+                                       adev->gfx.num_compute_rings +
+                                       kiq->pmf->set_resources_size);
+       if (r) {
+               DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+               spin_unlock(&kiq->ring_lock);
+               return r;
+       }
  
  
-               if (adev->enable_mes)
-                       queue_mask = ~0ULL;
+       if (adev->enable_mes)
+               queue_mask = ~0ULL;
  
  
-               kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
-               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-                       j = i + xcc_id * adev->gfx.num_compute_rings;
+       kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+               j = i + xcc_id * adev->gfx.num_compute_rings;
                         kiq->pmf->kiq_map_queues(kiq_ring,
                         kiq->pmf->kiq_map_queues(kiq_ring,
-                                                &adev->gfx.compute_ring[i]);
-               }
+                                                &adev->gfx.compute_ring[j]);
         }
  
         r = amdgpu_ring_test_helper(kiq_ring);
         }
  
         r = amdgpu_ring_test_helper(kiq_ring);
@@ -632,6 +632,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
                 return -EINVAL;
  
         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
                 return -EINVAL;
  
+       amdgpu_device_flush_hdp(adev, NULL);
+
         spin_lock(&kiq->ring_lock);
         /* No need to map kcq on the slave */
         if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
         spin_lock(&kiq->ring_lock);
         /* No need to map kcq on the slave */
         if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
@@ -639,14 +641,14 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
                                                 adev->gfx.num_gfx_rings);
                 if (r) {
                         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
                                                 adev->gfx.num_gfx_rings);
                 if (r) {
                         DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
+                       spin_unlock(&kiq->ring_lock);
                         return r;
                 }
  
                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                         j = i + xcc_id * adev->gfx.num_gfx_rings;
                         kiq->pmf->kiq_map_queues(kiq_ring,
                         return r;
                 }
  
                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
                         j = i + xcc_id * adev->gfx.num_gfx_rings;
                         kiq->pmf->kiq_map_queues(kiq_ring,
-                                                &adev->gfx.gfx_ring[i]);
+                                                &adev->gfx.gfx_ring[j]);
                 }
         }
  
                 }
         }
  
@@ -788,9 +790,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
                 if (r)
                         return r;
  
                 if (r)
                         return r;
  
-               r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
-               if (r)
-                       goto late_fini;
+               if (adev->gfx.cp_ecc_error_irq.funcs) {
+                       r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+                       if (r)
+                               goto late_fini;
+               }
         } else {
                 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
         }
         } else {
                 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
         }
@@ -884,6 +888,25 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
         return 0;
  }
  
         return 0;
  }
  
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+               void *ras_error_status,
+               void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+                               int xcc_id))
+{
+       int i;
+       int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+       uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+       if (err_data) {
+               err_data->ue_count = 0;
+               err_data->ce_count = 0;
+       }
+
+       for_each_inst(i, xcc_mask)
+               func(adev, ras_error_status, i);
+}
+
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
         signed long r, cnt = 0;
  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
  {
         signed long r, cnt = 0;
@@ -1167,3 +1190,119 @@ bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
         return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
                         adev->gfx.num_xcc_per_xcp : 1));
  }
         return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
                         adev->gfx.num_xcc_per_xcp : 1));
  }
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+                                               struct device_attribute *addr,
+                                               char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+       int mode;
+
+       mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+                                              AMDGPU_XCP_FL_NONE);
+
+       return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+                                               struct device_attribute *addr,
+                                               const char *buf, size_t count)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+       enum amdgpu_gfx_partition mode;
+       int ret = 0, num_xcc;
+
+       num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+       if (num_xcc % 2 != 0)
+               return -EINVAL;
+
+       if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+               mode = AMDGPU_SPX_PARTITION_MODE;
+       } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+               /*
+                * DPX mode needs AIDs to be in multiple of 2.
+                * Each AID connects 2 XCCs.
+                */
+               if (num_xcc%4)
+                       return -EINVAL;
+               mode = AMDGPU_DPX_PARTITION_MODE;
+       } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+               if (num_xcc != 6)
+                       return -EINVAL;
+               mode = AMDGPU_TPX_PARTITION_MODE;
+       } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+               if (num_xcc != 8)
+                       return -EINVAL;
+               mode = AMDGPU_QPX_PARTITION_MODE;
+       } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+               mode = AMDGPU_CPX_PARTITION_MODE;
+       } else {
+               return -EINVAL;
+       }
+
+       ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+                                               struct device_attribute *addr,
+                                               char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+       char *supported_partition;
+
+       /* TBD */
+       switch (NUM_XCC(adev->gfx.xcc_mask)) {
+       case 8:
+               supported_partition = "SPX, DPX, QPX, CPX";
+               break;
+       case 6:
+               supported_partition = "SPX, TPX, CPX";
+               break;
+       case 4:
+               supported_partition = "SPX, DPX, CPX";
+               break;
+       /* this seems only existing in emulation phase */
+       case 2:
+               supported_partition = "SPX, CPX";
+               break;
+       default:
+               supported_partition = "Not supported";
+               break;
+       }
+
+       return sysfs_emit(buf, "%s\n", supported_partition);
+}
+
+static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
+                  amdgpu_gfx_get_current_compute_partition,
+                  amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, S_IRUGO,
+                  amdgpu_gfx_get_available_compute_partition, NULL);
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+       int r;
+
+       r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+       if (r)
+               return r;
+
+       r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
+
+       return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+       device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+       device_remove_file(adev->dev, &dev_attr_available_compute_partition);
+}