Merge tag 'trace-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...

[J-linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v10_0.c
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index 9426e252d8aa6c67618b8244d3791e3e37ce0e28..65a4126135b0e23c540166d4652b3b1fa144791d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3485,6 +3485,7 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
  static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
  static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
  static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
  static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
                                  struct amdgpu_cu_info *cu_info);
  static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
@@ -3502,6 +3503,9 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
  static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
  static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
  static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+                                          uint16_t pasid, uint32_t flush_type,
+                                          bool all_hub, uint8_t dst_sel);
  
  static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
  {
@@ -3519,10 +3523,23 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
  static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
                                  struct amdgpu_ring *ring)
  {
-       struct amdgpu_device *adev = kiq_ring->adev;
         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
-       uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-       uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+       uint64_t wptr_addr = ring->wptr_gpu_addr;
+       uint32_t eng_sel = 0;
+
+       switch (ring->funcs->type) {
+       case AMDGPU_RING_TYPE_COMPUTE:
+               eng_sel = 0;
+               break;
+       case AMDGPU_RING_TYPE_GFX:
+               eng_sel = 4;
+               break;
+       case AMDGPU_RING_TYPE_MES:
+               eng_sel = 5;
+               break;
+       default:
+               WARN_ON(1);
+       }
  
         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
@@ -3548,8 +3565,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
                                    enum amdgpu_unmap_queues_action action,
                                    u64 gpu_addr, u64 seq)
  {
+       struct amdgpu_device *adev = kiq_ring->adev;
         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
  
+       if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
+               amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+               return;
+       }
+
         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
                           PACKET3_UNMAP_QUEUES_ACTION(action) |
@@ -3595,12 +3618,7 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
                                 uint16_t pasid, uint32_t flush_type,
                                 bool all_hub)
  {
-       amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-       amdgpu_ring_write(kiq_ring,
-                       PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-                       PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
-                       PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-                       PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+       gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
  }
  
  static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
@@ -3726,13 +3744,6 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
         gfx_v10_0_init_spm_golden_registers(adev);
  }
  
-static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
-{
-       adev->gfx.scratch.num_reg = 8;
-       adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
-       adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
  static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
                                        bool wc, uint32_t reg, uint32_t val)
  {
@@ -3769,34 +3780,26 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
  static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
  {
         struct amdgpu_device *adev = ring->adev;
-       uint32_t scratch;
         uint32_t tmp = 0;
         unsigned i;
         int r;
  
-       r = amdgpu_gfx_scratch_get(adev, &scratch);
-       if (r) {
-               DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
-               return r;
-       }
-
-       WREG32(scratch, 0xCAFEDEAD);
-
+       WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
         r = amdgpu_ring_alloc(ring, 3);
         if (r) {
                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
                           ring->idx, r);
-               amdgpu_gfx_scratch_free(adev, scratch);
                 return r;
         }
  
         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
-       amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+       amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
+                         PACKET3_SET_UCONFIG_REG_START);
         amdgpu_ring_write(ring, 0xDEADBEEF);
         amdgpu_ring_commit(ring);
  
         for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = RREG32(scratch);
+               tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
                 if (tmp == 0xDEADBEEF)
                         break;
                 if (amdgpu_emu_mode == 1)
@@ -3808,8 +3811,6 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
         if (i >= adev->usec_timeout)
                 r = -ETIMEDOUT;
  
-       amdgpu_gfx_scratch_free(adev, scratch);
-
         return r;
  }
  
@@ -3820,20 +3821,39 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
         struct dma_fence *f = NULL;
         unsigned index;
         uint64_t gpu_addr;
-       uint32_t tmp;
+       volatile uint32_t *cpu_ptr;
         long r;
  
-       r = amdgpu_device_wb_get(adev, &index);
-       if (r)
-               return r;
-
-       gpu_addr = adev->wb.gpu_addr + (index * 4);
-       adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
         memset(&ib, 0, sizeof(ib));
-       r = amdgpu_ib_get(adev, NULL, 16,
-                                       AMDGPU_IB_POOL_DIRECT, &ib);
-       if (r)
-               goto err1;
+
+       if (ring->is_mes_queue) {
+               uint32_t padding, offset;
+
+               offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
+               padding = amdgpu_mes_ctx_get_offs(ring,
+                                                 AMDGPU_MES_CTX_PADDING_OFFS);
+
+               ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+               ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+               gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
+               cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
+               *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
+       } else {
+               r = amdgpu_device_wb_get(adev, &index);
+               if (r)
+                       return r;
+
+               gpu_addr = adev->wb.gpu_addr + (index * 4);
+               adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+               cpu_ptr = &adev->wb.wb[index];
+
+               r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+               if (r) {
+                       DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+                       goto err1;
+               }
+       }
  
         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
@@ -3854,16 +3874,17 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                 goto err2;
         }
  
-       tmp = adev->wb.wb[index];
-       if (tmp == 0xDEADBEEF)
+       if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
                 r = 0;
         else
                 r = -EINVAL;
  err2:
-       amdgpu_ib_free(adev, &ib, NULL);
+       if (!ring->is_mes_queue)
+               amdgpu_ib_free(adev, &ib, NULL);
         dma_fence_put(f);
  err1:
-       amdgpu_device_wb_free(adev, index);
+       if (!ring->is_mes_queue)
+               amdgpu_device_wb_free(adev, index);
         return r;
  }
  
@@ -4688,7 +4709,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
  static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
                                    int me, int pipe, int queue)
  {
-       int r;
         struct amdgpu_ring *ring;
         unsigned int irq_type;
  
@@ -4708,17 +4728,13 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
  
         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
-       r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+       return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
                              AMDGPU_RING_PRIO_DEFAULT, NULL);
-       if (r)
-               return r;
-       return 0;
  }
  
  static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
                                        int mec, int pipe, int queue)
  {
-       int r;
         unsigned irq_type;
         struct amdgpu_ring *ring;
         unsigned int hw_prio;
@@ -4741,14 +4757,10 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
                 + ring->pipe;
         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
-                       AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+                       AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
         /* type-2 packets are deprecated on MEC, use type-3 instead */
-       r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+       return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
                              hw_prio, NULL);
-       if (r)
-               return r;
-
-       return 0;
  }
  
  static int gfx_v10_0_sw_init(void *handle)
@@ -4823,8 +4835,6 @@ static int gfx_v10_0_sw_init(void *handle)
  
         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
  
-       gfx_v10_0_scratch_init(adev);
-
         r = gfx_v10_0_me_init(adev);
         if (r)
                 return r;
@@ -4880,16 +4890,18 @@ static int gfx_v10_0_sw_init(void *handle)
                 }
         }
  
-       r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
-       if (r) {
-               DRM_ERROR("Failed to init KIQ BOs!\n");
-               return r;
-       }
+       if (!adev->enable_mes_kiq) {
+               r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+               if (r) {
+                       DRM_ERROR("Failed to init KIQ BOs!\n");
+                       return r;
+               }
  
-       kiq = &adev->gfx.kiq;
-       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
-       if (r)
-               return r;
+               kiq = &adev->gfx.kiq;
+               r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+               if (r)
+                       return r;
+       }
  
         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
         if (r)
@@ -4941,8 +4953,11 @@ static int gfx_v10_0_sw_fini(void *handle)
                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
  
         amdgpu_gfx_mqd_sw_fini(adev);
-       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
-       amdgpu_gfx_kiq_fini(adev);
+
+       if (!adev->enable_mes_kiq) {
+               amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
+               amdgpu_gfx_kiq_fini(adev);
+       }
  
         gfx_v10_0_pfp_fini(adev);
         gfx_v10_0_ce_fini(adev);
@@ -6345,12 +6360,12 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
  
         /* set the wb address wether it's enabled or not */
-       rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+       rptr_addr = ring->rptr_gpu_addr;
         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
  
-       wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+       wptr_gpu_addr = ring->wptr_gpu_addr;
         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
                      lower_32_bits(wptr_gpu_addr));
         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6383,11 +6398,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
                 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
                 WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
                 /* Set the wb address wether it's enabled or not */
-               rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+               rptr_addr = ring->rptr_gpu_addr;
                 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
                 WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
-               wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+               wptr_gpu_addr = ring->wptr_gpu_addr;
                 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
                              lower_32_bits(wptr_gpu_addr));
                 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
@@ -6566,10 +6581,10 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
         }
  }
  
-static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+                                 struct amdgpu_mqd_prop *prop)
  {
-       struct amdgpu_device *adev = ring->adev;
-       struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+       struct v10_gfx_mqd *mqd = m;
         uint64_t hqd_gpu_addr, wb_gpu_addr;
         uint32_t tmp;
         uint32_t rb_bufsz;
@@ -6579,8 +6594,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_gfx_hqd_wptr_hi = 0;
  
         /* set the pointer to the MQD */
-       mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
-       mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+       mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+       mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
  
         /* set up mqd control */
         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
@@ -6606,23 +6621,23 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_gfx_hqd_quantum = tmp;
  
         /* set up gfx hqd base. this is similar as CP_RB_BASE */
-       hqd_gpu_addr = ring->gpu_addr >> 8;
+       hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
  
         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
-       wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+       wb_gpu_addr = prop->rptr_gpu_addr;
         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
         mqd->cp_gfx_hqd_rptr_addr_hi =
                 upper_32_bits(wb_gpu_addr) & 0xffff;
  
         /* set up rb_wptr_poll addr */
-       wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+       wb_gpu_addr = prop->wptr_gpu_addr;
         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
  
         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
-       rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
+       rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
@@ -6633,9 +6648,9 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
  
         /* set up cp_doorbell_control */
         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
-       if (ring->use_doorbell) {
+       if (prop->use_doorbell) {
                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
-                                   DOORBELL_OFFSET, ring->doorbell_index);
+                                   DOORBELL_OFFSET, prop->doorbell_index);
                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
                                     DOORBELL_EN, 1);
         } else
@@ -6643,13 +6658,7 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
                                     DOORBELL_EN, 0);
         mqd->cp_rb_doorbell_control = tmp;
  
-       /*if there are 2 gfx rings, set the lower doorbell range of the first ring,
-        *otherwise the range of the second ring will override the first ring */
-       if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
-               gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
-
         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-       ring->wptr = 0;
         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
  
         /* active the queue */
@@ -6717,7 +6726,16 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
                 memset((void *)mqd, 0, sizeof(*mqd));
                 mutex_lock(&adev->srbm_mutex);
                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-               gfx_v10_0_gfx_mqd_init(ring);
+               amdgpu_ring_init_mqd(ring);
+
+               /*
+                * if there are 2 gfx rings, set the lower doorbell
+                * range of the first ring, otherwise the range of
+                * the second ring will override the first ring
+                */
+               if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+                       gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
  #ifdef BRING_UP_DEBUG
                 gfx_v10_0_gfx_queue_init_register(ring);
  #endif
@@ -6731,7 +6749,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
                 /* reset the ring */
                 ring->wptr = 0;
-               adev->wb.wb[ring->wptr_offs] = 0;
+               *ring->wptr_cpu_addr = 0;
                 amdgpu_ring_clear_ring(ring);
  #ifdef BRING_UP_DEBUG
                 mutex_lock(&adev->srbm_mutex);
@@ -6810,23 +6828,10 @@ done:
         return r;
  }
  
-static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+                                     struct amdgpu_mqd_prop *prop)
  {
-       struct amdgpu_device *adev = ring->adev;
-
-       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
-               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
-                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
-                       mqd->cp_hqd_queue_priority =
-                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
-               }
-       }
-}
-
-static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
-{
-       struct amdgpu_device *adev = ring->adev;
-       struct v10_compute_mqd *mqd = ring->mqd_ptr;
+       struct v10_compute_mqd *mqd = m;
         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
         uint32_t tmp;
  
@@ -6838,7 +6843,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
         mqd->compute_misc_reserved = 0x00000003;
  
-       eop_base_addr = ring->eop_gpu_addr >> 8;
+       eop_base_addr = prop->eop_gpu_addr >> 8;
         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
  
@@ -6852,9 +6857,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         /* enable doorbell? */
         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
  
-       if (ring->use_doorbell) {
+       if (prop->use_doorbell) {
                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-                                   DOORBELL_OFFSET, ring->doorbell_index);
+                                   DOORBELL_OFFSET, prop->doorbell_index);
                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
                                     DOORBELL_EN, 1);
                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -6869,15 +6874,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_hqd_pq_doorbell_control = tmp;
  
         /* disable the queue if it's active */
-       ring->wptr = 0;
         mqd->cp_hqd_dequeue_request = 0;
         mqd->cp_hqd_pq_rptr = 0;
         mqd->cp_hqd_pq_wptr_lo = 0;
         mqd->cp_hqd_pq_wptr_hi = 0;
  
         /* set the pointer to the MQD */
-       mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
-       mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+       mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+       mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
  
         /* set MQD vmid to 0 */
         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
@@ -6885,14 +6889,14 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_mqd_control = tmp;
  
         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-       hqd_gpu_addr = ring->gpu_addr >> 8;
+       hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
  
         /* set up the HQD, this is similar to CP_RB0_CNTL */
         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
-                           (order_base_2(ring->ring_size / 4) - 1));
+                           (order_base_2(prop->queue_size / 4) - 1));
         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
                             ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
  #ifdef __BIG_ENDIAN
@@ -6905,22 +6909,22 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_hqd_pq_control = tmp;
  
         /* set the wb address whether it's enabled or not */
-       wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+       wb_gpu_addr = prop->rptr_gpu_addr;
         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
         mqd->cp_hqd_pq_rptr_report_addr_hi =
                 upper_32_bits(wb_gpu_addr) & 0xffff;
  
         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-       wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+       wb_gpu_addr = prop->wptr_gpu_addr;
         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
  
         tmp = 0;
         /* enable the doorbell if requested */
-       if (ring->use_doorbell) {
+       if (prop->use_doorbell) {
                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-                               DOORBELL_OFFSET, ring->doorbell_index);
+                               DOORBELL_OFFSET, prop->doorbell_index);
  
                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
                                     DOORBELL_EN, 1);
@@ -6933,7 +6937,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_hqd_pq_doorbell_control = tmp;
  
         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-       ring->wptr = 0;
         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
  
         /* set the vmid for the queue */
@@ -6949,13 +6952,10 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
         mqd->cp_hqd_ib_control = tmp;
  
         /* set static priority for a compute queue/ring */
-       gfx_v10_0_compute_mqd_set_priority(ring, mqd);
+       mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+       mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
  
-       /* map_queues packet doesn't need activate the queue,
-        * so only kiq need set this field.
-        */
-       if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
-               mqd->cp_hqd_active = 1;
+       mqd->cp_hqd_active = prop->hqd_active;
  
         return 0;
  }
@@ -7096,7 +7096,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
                 memset((void *)mqd, 0, sizeof(*mqd));
                 mutex_lock(&adev->srbm_mutex);
                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-               gfx_v10_0_compute_mqd_init(ring);
+               amdgpu_ring_init_mqd(ring);
                 gfx_v10_0_kiq_init_register(ring);
                 nv_grbm_select(adev, 0, 0, 0, 0);
                 mutex_unlock(&adev->srbm_mutex);
@@ -7118,7 +7118,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
                 memset((void *)mqd, 0, sizeof(*mqd));
                 mutex_lock(&adev->srbm_mutex);
                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-               gfx_v10_0_compute_mqd_init(ring);
+               amdgpu_ring_init_mqd(ring);
                 nv_grbm_select(adev, 0, 0, 0, 0);
                 mutex_unlock(&adev->srbm_mutex);
  
@@ -7131,7 +7131,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
  
                 /* reset ring buffer */
                 ring->wptr = 0;
-               atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
+               atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
                 amdgpu_ring_clear_ring(ring);
         } else {
                 amdgpu_ring_clear_ring(ring);
@@ -7211,7 +7211,10 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
                         return r;
         }
  
-       r = gfx_v10_0_kiq_resume(adev);
+       if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+               r = amdgpu_mes_kiq_hw_init(adev);
+       else
+               r = gfx_v10_0_kiq_resume(adev);
         if (r)
                 return r;
  
@@ -7801,6 +7804,7 @@ static int gfx_v10_0_early_init(void *handle)
         gfx_v10_0_set_irq_funcs(adev);
         gfx_v10_0_set_gds_init(adev);
         gfx_v10_0_set_rlc_funcs(adev);
+       gfx_v10_0_set_mqd_funcs(adev);
  
         /* init rlcg reg access ctrl */
         gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
@@ -8451,7 +8455,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
         return 0;
  }
  
-static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
  {
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
         int data;
@@ -8497,7 +8501,8 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
  
  static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
  {
-       return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
+       /* gfx10 is 32bit rptr*/
+       return *(uint32_t *)ring->rptr_cpu_addr;
  }
  
  static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -8507,7 +8512,7 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
  
         /* XXX check if swapping is necessary on BE */
         if (ring->use_doorbell) {
-               wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
         } else {
                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
@@ -8522,7 +8527,7 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
  
         if (ring->use_doorbell) {
                 /* XXX check if swapping is necessary on BE */
-               atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+               atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
                 WDOORBELL64(ring->doorbell_index, ring->wptr);
         } else {
                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
@@ -8532,7 +8537,8 @@ static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
  
  static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
  {
-       return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
+       /* gfx10 hardware is 32bit rptr */
+       return *(uint32_t *)ring->rptr_cpu_addr;
  }
  
  static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
@@ -8541,7 +8547,7 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
  
         /* XXX check if swapping is necessary on BE */
         if (ring->use_doorbell)
-               wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
         else
                 BUG();
         return wptr;
@@ -8553,7 +8559,7 @@ static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
  
         /* XXX check if swapping is necessary on BE */
         if (ring->use_doorbell) {
-               atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+               atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
                 WDOORBELL64(ring->doorbell_index, ring->wptr);
         } else {
                 BUG(); /* only DOORBELL method supported on gfx10 now */
@@ -8615,6 +8621,10 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
         }
  
+       if (ring->is_mes_queue)
+               /* inherit vmid from mqd */
+               control |= 0x400000;
+
         amdgpu_ring_write(ring, header);
         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
         amdgpu_ring_write(ring,
@@ -8634,6 +8644,10 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
  
+       if (ring->is_mes_queue)
+               /* inherit vmid from mqd */
+               control |= 0x40000000;
+
         /* Currently, there is a high possibility to get wave ID mismatch
          * between ME and GDS, leading to a hw deadlock, because ME generates
          * different wave IDs than the GDS expects. This situation happens
@@ -8691,7 +8705,8 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
         amdgpu_ring_write(ring, upper_32_bits(addr));
         amdgpu_ring_write(ring, lower_32_bits(seq));
         amdgpu_ring_write(ring, upper_32_bits(seq));
-       amdgpu_ring_write(ring, 0);
+       amdgpu_ring_write(ring, ring->is_mes_queue ?
+                        (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
  }
  
  static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -8704,10 +8719,25 @@ static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
                                upper_32_bits(addr), seq, 0xffffffff, 4);
  }
  
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+                                  uint16_t pasid, uint32_t flush_type,
+                                  bool all_hub, uint8_t dst_sel)
+{
+       amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+       amdgpu_ring_write(ring,
+                         PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
  static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                                          unsigned vmid, uint64_t pd_addr)
  {
-       amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+       if (ring->is_mes_queue)
+               gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
+       else
+               amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
  
         /* compute doesn't have PFP */
         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -8862,26 +8892,36 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
  {
         struct amdgpu_device *adev = ring->adev;
         struct v10_ce_ib_state ce_payload = {0};
-       uint64_t csa_addr;
+       uint64_t offset, ce_payload_gpu_addr;
+       void *ce_payload_cpu_addr;
         int cnt;
  
         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
-       csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+       if (ring->is_mes_queue) {
+               offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+                                 gfx[0].gfx_meta_data) +
+                       offsetof(struct v10_gfx_meta_data, ce_payload);
+               ce_payload_gpu_addr =
+                       amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+               ce_payload_cpu_addr =
+                       amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+       } else {
+               offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+               ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+               ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+       }
  
         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
                                  WRITE_DATA_DST_SEL(8) |
                                  WR_CONFIRM) |
                                  WRITE_DATA_CACHE_POLICY(0));
-       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
-                             offsetof(struct v10_gfx_meta_data, ce_payload)));
-       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
-                             offsetof(struct v10_gfx_meta_data, ce_payload)));
+       amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+       amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
  
         if (resume)
-               amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
-                                          offsetof(struct v10_gfx_meta_data,
-                                                   ce_payload),
+               amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
                                            sizeof(ce_payload) >> 2);
         else
                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
@@ -8892,12 +8932,33 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
  {
         struct amdgpu_device *adev = ring->adev;
         struct v10_de_ib_state de_payload = {0};
-       uint64_t csa_addr, gds_addr;
+       uint64_t offset, gds_addr, de_payload_gpu_addr;
+       void *de_payload_cpu_addr;
         int cnt;
  
-       csa_addr = amdgpu_csa_vaddr(ring->adev);
-       gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
-                        PAGE_SIZE);
+       if (ring->is_mes_queue) {
+               offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+                                 gfx[0].gfx_meta_data) +
+                       offsetof(struct v10_gfx_meta_data, de_payload);
+               de_payload_gpu_addr =
+                       amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+               de_payload_cpu_addr =
+                       amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+               offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+                                 gfx[0].gds_backup) +
+                       offsetof(struct v10_gfx_meta_data, de_payload);
+               gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+       } else {
+               offset = offsetof(struct v10_gfx_meta_data, de_payload);
+               de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+               de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+               gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+                                AMDGPU_CSA_SIZE - adev->gds.gds_size,
+                                PAGE_SIZE);
+       }
+
         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
  
@@ -8907,15 +8968,11 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
                                  WRITE_DATA_DST_SEL(8) |
                                  WR_CONFIRM) |
                                  WRITE_DATA_CACHE_POLICY(0));
-       amdgpu_ring_write(ring, lower_32_bits(csa_addr +
-                             offsetof(struct v10_gfx_meta_data, de_payload)));
-       amdgpu_ring_write(ring, upper_32_bits(csa_addr +
-                             offsetof(struct v10_gfx_meta_data, de_payload)));
+       amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+       amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
  
         if (resume)
-               amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
-                                          offsetof(struct v10_gfx_meta_data,
-                                                   de_payload),
+               amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
                                            sizeof(de_payload) >> 2);
         else
                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
@@ -9152,31 +9209,51 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
         int i;
         u8 me_id, pipe_id, queue_id;
         struct amdgpu_ring *ring;
+       uint32_t mes_queue_id = entry->src_data[0];
  
         DRM_DEBUG("IH: CP EOP\n");
-       me_id = (entry->ring_id & 0x0c) >> 2;
-       pipe_id = (entry->ring_id & 0x03) >> 0;
-       queue_id = (entry->ring_id & 0x70) >> 4;
  
-       switch (me_id) {
-       case 0:
-               if (pipe_id == 0)
-                       amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
-               else
-                       amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
-               break;
-       case 1:
-       case 2:
-               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-                       ring = &adev->gfx.compute_ring[i];
-                       /* Per-queue interrupt is supported for MEC starting from VI.
-                         * The interrupt can only be enabled/disabled per pipe instead of per queue.
-                         */
-                       if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
-                               amdgpu_fence_process(ring);
+       if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
+               struct amdgpu_mes_queue *queue;
+
+               mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+
+               spin_lock(&adev->mes.queue_id_lock);
+               queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
+               if (queue) {
+                       DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
+                       amdgpu_fence_process(queue->ring);
+               }
+               spin_unlock(&adev->mes.queue_id_lock);
+       } else {
+               me_id = (entry->ring_id & 0x0c) >> 2;
+               pipe_id = (entry->ring_id & 0x03) >> 0;
+               queue_id = (entry->ring_id & 0x70) >> 4;
+
+               switch (me_id) {
+               case 0:
+                       if (pipe_id == 0)
+                               amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+                       else
+                               amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+                       break;
+               case 1:
+               case 2:
+                       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+                               ring = &adev->gfx.compute_ring[i];
+                               /* Per-queue interrupt is supported for MEC starting from VI.
+                                * The interrupt can only be enabled/disabled per pipe instead
+                                * of per queue.
+                                */
+                               if ((ring->me == me_id) &&
+                                   (ring->pipe == pipe_id) &&
+                                   (ring->queue == queue_id))
+                                       amdgpu_fence_process(ring);
+                       }
+                       break;
                 }
-               break;
         }
+
         return 0;
  }
  
@@ -9581,6 +9658,20 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
         adev->gds.oa_size = 16;
  }
  
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+       /* set gfx eng mqd */
+       adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+               sizeof(struct v10_gfx_mqd);
+       adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+               gfx_v10_0_gfx_mqd_init;
+       /* set compute eng mqd */
+       adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+               sizeof(struct v10_compute_mqd);
+       adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+               gfx_v10_0_compute_mqd_init;
+}
+
  static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
                                                           u32 bitmap)
  {