Merge tag 'x86-boot-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_gfx.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index fd81b04559d4992d11732eecae35b8c5a0b43612..b9674c57c4365fb5ebdf9644fc4ac0a31b955da8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -29,6 +29,7 @@
  #include "amdgpu_rlc.h"
  #include "amdgpu_ras.h"
  #include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
  
  /* delay 0.1 second to enable gfx off feature */
  #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
@@ -110,9 +111,9 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
   * The bitmask of CUs to be disabled in the shader array determined by se and
   * sh is stored in mask[se * max_sh + sh].
   */
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
  {
-       unsigned se, sh, cu;
+       unsigned int se, sh, cu;
         const char *p;
  
         memset(mask, 0, sizeof(*mask) * max_se * max_sh);
@@ -124,6 +125,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
         for (;;) {
                 char *next;
                 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+
                 if (ret < 3) {
                         DRM_ERROR("amdgpu: could not parse disable_cu\n");
                         return;
@@ -157,7 +159,7 @@ static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
                 return amdgpu_compute_multipipe == 1;
         }
  
-       if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
                 return true;
  
         /* FIXME: spreading the queues across pipes causes perf regressions
@@ -349,7 +351,7 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
  }
  
  int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
-                       unsigned hpd_size, int xcc_id)
+                       unsigned int hpd_size, int xcc_id)
  {
         int r;
         u32 *hpd;
@@ -376,16 +378,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
  
  /* create MQD for each compute/gfx queue */
  int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
-                          unsigned mqd_size, int xcc_id)
+                          unsigned int mqd_size, int xcc_id)
  {
         int r, i, j;
         struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *ring = &kiq->ring;
         u32 domain = AMDGPU_GEM_DOMAIN_GTT;
  
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
         /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
-       if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
                 domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
  
         /* create MQD for KIQ */
         if (!adev->enable_mes_kiq && !ring->mqd_obj) {
@@ -407,8 +411,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
  
                 /* prepare MQD backup */
                 kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
-               if (!kiq->mqd_backup)
-                               dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+               if (!kiq->mqd_backup) {
+                       dev_warn(adev->dev,
+                                "no memory to create MQD backup for ring %s\n", ring->name);
+                       return -ENOMEM;
+               }
         }
  
         if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
@@ -427,8 +434,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                                 ring->mqd_size = mqd_size;
                                 /* prepare MQD backup */
                                 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
-                               if (!adev->gfx.me.mqd_backup[i])
+                               if (!adev->gfx.me.mqd_backup[i]) {
                                         dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+                                       return -ENOMEM;
+                               }
                         }
                 }
         }
@@ -449,8 +458,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                         ring->mqd_size = mqd_size;
                         /* prepare MQD backup */
                         adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
-                       if (!adev->gfx.mec.mqd_backup[j])
+                       if (!adev->gfx.mec.mqd_backup[j]) {
                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+                               return -ENOMEM;
+                       }
                 }
         }
  
@@ -493,6 +504,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
  {
         struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *kiq_ring = &kiq->ring;
+       struct amdgpu_hive_info *hive;
+       struct amdgpu_ras *ras;
+       int hive_ras_recovery = 0;
         int i, r = 0;
         int j;
  
@@ -513,6 +527,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
                                            RESET_QUEUES, 0, 0);
         }
  
+       /**
+        * This is workaround: only skip kiq_ring test
+        * during ras recovery in suspend stage for gfx9.4.3
+        */
+       hive = amdgpu_get_xgmi_hive(adev);
+       if (hive) {
+               hive_ras_recovery = atomic_read(&hive->ras_recovery);
+               amdgpu_put_xgmi_hive(hive);
+       }
+
+       ras = amdgpu_ras_get_context(adev);
+       if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
+               ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
+               spin_unlock(&kiq->ring_lock);
+               return 0;
+       }
+
         if (kiq_ring->sched.ready && !adev->job_hang)
                 r = amdgpu_ring_test_helper(kiq_ring);
         spin_unlock(&kiq->ring_lock);
@@ -900,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
                 func(adev, ras_error_status, i);
  }
  
-uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
  {
         signed long r, cnt = 0;
         unsigned long flags;
         uint32_t seq, reg_val_offs = 0, value = 0;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *ring = &kiq->ring;
  
         if (amdgpu_device_skip_hw_access(adev))
@@ -968,12 +999,12 @@ failed_kiq_read:
         return ~0;
  }
  
-void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
  {
         signed long r, cnt = 0;
         unsigned long flags;
         uint32_t seq;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *ring = &kiq->ring;
  
         BUG_ON(!ring->funcs->emit_wreg);
@@ -1274,11 +1305,11 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
         return sysfs_emit(buf, "%s\n", supported_partition);
  }
  
-static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(current_compute_partition, 0644,
                    amdgpu_gfx_get_current_compute_partition,
                    amdgpu_gfx_set_compute_partition);
  
-static DEVICE_ATTR(available_compute_partition, S_IRUGO,
+static DEVICE_ATTR(available_compute_partition, 0444,
                    amdgpu_gfx_get_available_compute_partition, NULL);
  
  int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)