drm/amdgpu: add [en/dis]able_kgq() functions

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_gfx.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index 35ed46b9249c13ccc0a3b13758f76a70fa08da90..90f5d302d5f3e4b452c836c76099a7ae5f29d26b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -63,10 +63,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
  }
  
  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
-                                    int mec, int pipe, int queue)
+                                    int xcc_id, int mec, int pipe, int queue)
  {
         return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
-                       adev->gfx.mec.queue_bitmap);
+                       adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
  }
  
  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
@@ -204,29 +204,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
  
  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
  {
-       int i, queue, pipe;
+       int i, j, queue, pipe;
         bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
         int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
                                      adev->gfx.mec.num_queue_per_pipe,
                                      adev->gfx.num_compute_rings);
+       int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1;
  
         if (multipipe_policy) {
-               /* policy: make queues evenly cross all pipes on MEC1 only */
-               for (i = 0; i < max_queues_per_mec; i++) {
-                       pipe = i % adev->gfx.mec.num_pipe_per_mec;
-                       queue = (i / adev->gfx.mec.num_pipe_per_mec) %
-                               adev->gfx.mec.num_queue_per_pipe;
-
-                       set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
-                                       adev->gfx.mec.queue_bitmap);
+               /* policy: make queues evenly cross all pipes on MEC1 only
+                * for multiple xcc, just use the original policy for simplicity */
+               for (j = 0; j < num_xcd; j++) {
+                       for (i = 0; i < max_queues_per_mec; i++) {
+                               pipe = i % adev->gfx.mec.num_pipe_per_mec;
+                               queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+                                        adev->gfx.mec.num_queue_per_pipe;
+
+                               set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+                                       adev->gfx.mec_bitmap[j].queue_bitmap);
+                       }
                 }
         } else {
                 /* policy: amdgpu owns all queues in the given pipe */
-               for (i = 0; i < max_queues_per_mec; ++i)
-                       set_bit(i, adev->gfx.mec.queue_bitmap);
+               for (j = 0; j < num_xcd; j++) {
+                       for (i = 0; i < max_queues_per_mec; ++i)
+                               set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+               }
         }
  
-       dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+       for (j = 0; j < num_xcd; j++) {
+               dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+                       bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+       }
  }
  
  void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
@@ -258,7 +267,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
  }
  
  static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
-                                 struct amdgpu_ring *ring)
+                                 struct amdgpu_ring *ring, int xcc_id)
  {
         int queue_bit;
         int mec, pipe, queue;
@@ -268,7 +277,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
                     * adev->gfx.mec.num_queue_per_pipe;
  
         while (--queue_bit >= 0) {
-               if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+               if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
                         continue;
  
                 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@@ -294,9 +303,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
  
  int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
                              struct amdgpu_ring *ring,
-                            struct amdgpu_irq_src *irq)
+                            struct amdgpu_irq_src *irq, int xcc_id)
  {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         int r = 0;
  
         spin_lock_init(&kiq->ring_lock);
@@ -305,14 +314,21 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
         ring->ring_obj = NULL;
         ring->use_doorbell = true;
         ring->doorbell_index = adev->doorbell_index.kiq;
-
-       r = amdgpu_gfx_kiq_acquire(adev, ring);
+       ring->xcc_id = xcc_id;
+       ring->vm_hub = AMDGPU_GFXHUB_0;
+       if (xcc_id >= 1)
+               ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start +
+                                       xcc_id - 1;
+       else
+               ring->doorbell_index = adev->doorbell_index.kiq;
+
+       r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
         if (r)
                 return r;
  
         ring->eop_gpu_addr = kiq->eop_gpu_addr;
         ring->no_scheduler = true;
-       sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+       sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
         r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
                              AMDGPU_RING_PRIO_DEFAULT, NULL);
         if (r)
@@ -326,19 +342,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
         amdgpu_ring_fini(ring);
  }
  
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
  {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
  
         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
  }
  
  int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
-                       unsigned hpd_size)
+                       unsigned hpd_size, int xcc_id)
  {
         int r;
         u32 *hpd;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
  
         r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@@ -361,13 +377,13 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
  
  /* create MQD for each compute/gfx queue */
  int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
-                          unsigned mqd_size)
+                          unsigned mqd_size, int xcc_id)
  {
-       struct amdgpu_ring *ring = NULL;
-       int r, i;
+       int r, i, j;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *ring = &kiq->ring;
  
         /* create MQD for KIQ */
-       ring = &adev->gfx.kiq.ring;
         if (!adev->enable_mes_kiq && !ring->mqd_obj) {
                 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
                  * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
@@ -386,8 +402,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                 }
  
                 /* prepare MQD backup */
-               adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
-               if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
+               kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+               if (!kiq->mqd_backup)
                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
         }
  
@@ -404,6 +420,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                                         return r;
                                 }
  
+                               ring->mqd_size = mqd_size;
                                 /* prepare MQD backup */
                                 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
                                 if (!adev->gfx.me.mqd_backup[i])
@@ -414,7 +431,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
  
         /* create MQD for each KCQ */
         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               ring = &adev->gfx.compute_ring[i];
+               j = i + xcc_id * adev->gfx.num_compute_rings;
+               ring = &adev->gfx.compute_ring[j];
                 if (!ring->mqd_obj) {
                         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
                                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
@@ -424,9 +442,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                                 return r;
                         }
  
+                       ring->mqd_size = mqd_size;
                         /* prepare MQD backup */
-                       adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
-                       if (!adev->gfx.mec.mqd_backup[i])
+                       adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+                       if (!adev->gfx.mec.mqd_backup[j])
                                 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
                 }
         }
@@ -434,10 +453,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
         return 0;
  }
  
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
  {
         struct amdgpu_ring *ring = NULL;
-       int i;
+       int i, j;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
  
         if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@@ -450,43 +470,84 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
         }
  
         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               ring = &adev->gfx.compute_ring[i];
-               kfree(adev->gfx.mec.mqd_backup[i]);
+               j = i + xcc_id * adev->gfx.num_compute_rings;
+               ring = &adev->gfx.compute_ring[j];
+               kfree(adev->gfx.mec.mqd_backup[j]);
                 amdgpu_bo_free_kernel(&ring->mqd_obj,
                                       &ring->mqd_gpu_addr,
                                       &ring->mqd_ptr);
         }
  
-       ring = &adev->gfx.kiq.ring;
+       ring = &kiq->ring;
+       kfree(kiq->mqd_backup);
         kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
         amdgpu_bo_free_kernel(&ring->mqd_obj,
                               &ring->mqd_gpu_addr,
                               &ring->mqd_ptr);
  }
  
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
  {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
         struct amdgpu_ring *kiq_ring = &kiq->ring;
         int i, r = 0;
+       int j;
  
         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                 return -EINVAL;
  
-       spin_lock(&adev->gfx.kiq.ring_lock);
-       if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
-                                       adev->gfx.num_compute_rings)) {
-               spin_unlock(&adev->gfx.kiq.ring_lock);
-               return -ENOMEM;
+       spin_lock(&kiq->ring_lock);
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+                                               adev->gfx.num_compute_rings)) {
+                       spin_unlock(&kiq->ring_lock);
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_compute_rings;
+                       kiq->pmf->kiq_unmap_queues(kiq_ring,
+                                                  &adev->gfx.compute_ring[i],
+                                                  RESET_QUEUES, 0, 0);
+               }
         }
  
-       for (i = 0; i < adev->gfx.num_compute_rings; i++)
-               kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
-                                          RESET_QUEUES, 0, 0);
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+               r = amdgpu_ring_test_helper(kiq_ring);
+       spin_unlock(&kiq->ring_lock);
  
-       if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
+       return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
+       int i, r = 0;
+       int j;
+
+       if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+               return -EINVAL;
+
+       spin_lock(&kiq->ring_lock);
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+                                               adev->gfx.num_gfx_rings)) {
+                       spin_unlock(&kiq->ring_lock);
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_gfx_rings;
+                       kiq->pmf->kiq_unmap_queues(kiq_ring,
+                                                  &adev->gfx.gfx_ring[i],
+                                                  PREEMPT_QUEUES, 0, 0);
+               }
+       }
+
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
                 r = amdgpu_ring_test_helper(kiq_ring);
-       spin_unlock(&adev->gfx.kiq.ring_lock);
+       spin_unlock(&kiq->ring_lock);
  
         return r;
  }
@@ -504,18 +565,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
         return set_resource_bit;
  }
  
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
  {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
         uint64_t queue_mask = 0;
-       int r, i;
+       int r, i, j;
  
         if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
                 return -EINVAL;
  
         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-               if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+               if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
                         continue;
  
                 /* This situation may be hit in the future if a new HW
@@ -531,25 +592,66 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
  
         DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
                                                         kiq_ring->queue);
-       spin_lock(&adev->gfx.kiq.ring_lock);
-       r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
-                                       adev->gfx.num_compute_rings +
-                                       kiq->pmf->set_resources_size);
-       if (r) {
-               DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-               spin_unlock(&adev->gfx.kiq.ring_lock);
-               return r;
+       spin_lock(&kiq->ring_lock);
+       /* No need to map kcq on the slave */
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+                                               adev->gfx.num_compute_rings +
+                                               kiq->pmf->set_resources_size);
+               if (r) {
+                       DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
+                       return r;
+               }
+
+               if (adev->enable_mes)
+                       queue_mask = ~0ULL;
+
+               kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_compute_rings;
+                       kiq->pmf->kiq_map_queues(kiq_ring,
+                                                &adev->gfx.compute_ring[i]);
+               }
         }
  
-       if (adev->enable_mes)
-               queue_mask = ~0ULL;
+       r = amdgpu_ring_test_helper(kiq_ring);
+       spin_unlock(&kiq->ring_lock);
+       if (r)
+               DRM_ERROR("KCQ enable failed\n");
+
+       return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
+       int r, i, j;
+
+       if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+               return -EINVAL;
+
+       spin_lock(&kiq->ring_lock);
+       /* No need to map kcq on the slave */
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+                                               adev->gfx.num_gfx_rings);
+               if (r) {
+                       DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
+                       return r;
+               }
  
-       kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
-       for (i = 0; i < adev->gfx.num_compute_rings; i++)
-               kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+               for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_gfx_rings;
+                       kiq->pmf->kiq_map_queues(kiq_ring,
+                                                &adev->gfx.gfx_ring[i]);
+               }
+       }
  
         r = amdgpu_ring_test_helper(kiq_ring);
-       spin_unlock(&adev->gfx.kiq.ring_lock);
+       spin_unlock(&kiq->ring_lock);
         if (r)
                 DRM_ERROR("KCQ enable failed\n");
  
@@ -725,7 +827,7 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
  
         /* If not define special ras_late_init function, use gfx default ras_late_init */
         if (!ras->ras_block.ras_late_init)
-               ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+               ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
  
         /* If not defined special ras_cb function, use default ras_cb */
         if (!ras->ras_block.ras_cb)
@@ -787,7 +889,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
         signed long r, cnt = 0;
         unsigned long flags;
         uint32_t seq, reg_val_offs = 0, value = 0;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
         struct amdgpu_ring *ring = &kiq->ring;
  
         if (amdgpu_device_skip_hw_access(adev))
@@ -855,7 +957,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
         signed long r, cnt = 0;
         unsigned long flags;
         uint32_t seq;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
         struct amdgpu_ring *ring = &kiq->ring;
  
         BUG_ON(!ring->funcs->emit_wreg);
@@ -1059,3 +1161,9 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
                 adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
         }
  }
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+       return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+                       adev->gfx.num_xcc_per_xcp : 1));
+}