Merge tag 'for-5.12-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

index 37639214cbbbd5edeca3e97a93360c4e4ab32020..84d2eaa381013781ec62d71a2fa63c48a5a63830 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -29,6 +29,7 @@
  
  #include "amdgpu.h"
  #include "amdgpu_gfx.h"
+#include "amdgpu_ring.h"
  #include "vi.h"
  #include "vi_structs.h"
  #include "vid.h"
@@ -1923,8 +1924,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
                 + ring->pipe;
  
-       hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
-                                                           ring->queue) ?
+       hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
         /* type-2 packets are deprecated on MEC, use type-3 instead */
         r = amdgpu_ring_init(adev, ring, 1024,
@@ -4442,8 +4442,7 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m
         struct amdgpu_device *adev = ring->adev;
  
         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
-               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe,
-                                                             ring->queue)) {
+               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
                         mqd->cp_hqd_queue_priority =
                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
@@ -6847,6 +6846,66 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */
  }
  
+
+/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
+#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT      0x0000007f
+static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
+                                       uint32_t pipe, bool enable)
+{
+       uint32_t val;
+       uint32_t wcl_cs_reg;
+
+       val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
+
+       switch (pipe) {
+       case 0:
+               wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
+               break;
+       case 1:
+               wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
+               break;
+       case 2:
+               wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
+               break;
+       case 3:
+               wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
+               break;
+       default:
+               DRM_DEBUG("invalid pipe %d\n", pipe);
+               return;
+       }
+
+       amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
+
+}
+
+#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT     0x07ffffff
+static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+       struct amdgpu_device *adev = ring->adev;
+       uint32_t val;
+       int i;
+
+       /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+        * number of gfx waves. Setting 5 bit will make sure gfx only gets
+        * around 25% of gpu resources.
+        */
+       val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+       amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
+
+       /* Restrict waves for normal/low priority compute queues as well
+        * to get best QoS for high priority compute jobs.
+        *
+        * amdgpu controls only 1st ME(0-3 CS pipes).
+        */
+       for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+               if (i != ring->pipe)
+                       gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
+
+       }
+
+}
+
  static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
         .name = "gfx_v8_0",
         .early_init = gfx_v8_0_early_init,
@@ -6930,7 +6989,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
                 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-               7, /* gfx_v8_0_emit_mem_sync_compute */
+               7 + /* gfx_v8_0_emit_mem_sync_compute */
+               5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
+               15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
@@ -6944,6 +7005,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
         .pad_ib = amdgpu_ring_generic_pad_ib,
         .emit_wreg = gfx_v8_0_ring_emit_wreg,
         .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
+       .emit_wave_limit = gfx_v8_0_emit_wave_limit,
  };
  
  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {