]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drm/amdgpu: add [en/dis]able_kgq() functions
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_gfx.c
index 35ed46b9249c13ccc0a3b13758f76a70fa08da90..90f5d302d5f3e4b452c836c76099a7ae5f29d26b 100644 (file)
@@ -63,10 +63,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
 }
 
 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
-                                    int mec, int pipe, int queue)
+                                    int xcc_id, int mec, int pipe, int queue)
 {
        return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
-                       adev->gfx.mec.queue_bitmap);
+                       adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
 }
 
 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
@@ -204,29 +204,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-       int i, queue, pipe;
+       int i, j, queue, pipe;
        bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
        int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
                                     adev->gfx.mec.num_queue_per_pipe,
                                     adev->gfx.num_compute_rings);
+       int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1;
 
        if (multipipe_policy) {
-               /* policy: make queues evenly cross all pipes on MEC1 only */
-               for (i = 0; i < max_queues_per_mec; i++) {
-                       pipe = i % adev->gfx.mec.num_pipe_per_mec;
-                       queue = (i / adev->gfx.mec.num_pipe_per_mec) %
-                               adev->gfx.mec.num_queue_per_pipe;
-
-                       set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
-                                       adev->gfx.mec.queue_bitmap);
+               /* policy: make queues evenly cross all pipes on MEC1 only
+                * for multiple xcc, just use the original policy for simplicity */
+               for (j = 0; j < num_xcd; j++) {
+                       for (i = 0; i < max_queues_per_mec; i++) {
+                               pipe = i % adev->gfx.mec.num_pipe_per_mec;
+                               queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+                                        adev->gfx.mec.num_queue_per_pipe;
+
+                               set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+                                       adev->gfx.mec_bitmap[j].queue_bitmap);
+                       }
                }
        } else {
                /* policy: amdgpu owns all queues in the given pipe */
-               for (i = 0; i < max_queues_per_mec; ++i)
-                       set_bit(i, adev->gfx.mec.queue_bitmap);
+               for (j = 0; j < num_xcd; j++) {
+                       for (i = 0; i < max_queues_per_mec; ++i)
+                               set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+               }
        }
 
-       dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+       for (j = 0; j < num_xcd; j++) {
+               dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+                       bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+       }
 }
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
@@ -258,7 +267,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
 }
 
 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
-                                 struct amdgpu_ring *ring)
+                                 struct amdgpu_ring *ring, int xcc_id)
 {
        int queue_bit;
        int mec, pipe, queue;
@@ -268,7 +277,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
                    * adev->gfx.mec.num_queue_per_pipe;
 
        while (--queue_bit >= 0) {
-               if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+               if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
                        continue;
 
                amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
@@ -294,9 +303,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 
 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
                             struct amdgpu_ring *ring,
-                            struct amdgpu_irq_src *irq)
+                            struct amdgpu_irq_src *irq, int xcc_id)
 {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
        int r = 0;
 
        spin_lock_init(&kiq->ring_lock);
@@ -305,14 +314,21 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
        ring->ring_obj = NULL;
        ring->use_doorbell = true;
        ring->doorbell_index = adev->doorbell_index.kiq;
-
-       r = amdgpu_gfx_kiq_acquire(adev, ring);
+       ring->xcc_id = xcc_id;
+       ring->vm_hub = AMDGPU_GFXHUB_0;
+       if (xcc_id >= 1)
+               ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start +
+                                       xcc_id - 1;
+       else
+               ring->doorbell_index = adev->doorbell_index.kiq;
+
+       r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
        if (r)
                return r;
 
        ring->eop_gpu_addr = kiq->eop_gpu_addr;
        ring->no_scheduler = true;
-       sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+       sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue);
        r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
                             AMDGPU_RING_PRIO_DEFAULT, NULL);
        if (r)
@@ -326,19 +342,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
        amdgpu_ring_fini(ring);
 }
 
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
 {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 
        amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 }
 
 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
-                       unsigned hpd_size)
+                       unsigned hpd_size, int xcc_id)
 {
        int r;
        u32 *hpd;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 
        r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
@@ -361,13 +377,13 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 
 /* create MQD for each compute/gfx queue */
 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
-                          unsigned mqd_size)
+                          unsigned mqd_size, int xcc_id)
 {
-       struct amdgpu_ring *ring = NULL;
-       int r, i;
+       int r, i, j;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *ring = &kiq->ring;
 
        /* create MQD for KIQ */
-       ring = &adev->gfx.kiq.ring;
        if (!adev->enable_mes_kiq && !ring->mqd_obj) {
                /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
                 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
@@ -386,8 +402,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                }
 
                /* prepare MQD backup */
-               adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
-               if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
+               kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+               if (!kiq->mqd_backup)
                                dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
        }
 
@@ -404,6 +420,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                                        return r;
                                }
 
+                               ring->mqd_size = mqd_size;
                                /* prepare MQD backup */
                                adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
                                if (!adev->gfx.me.mqd_backup[i])
@@ -414,7 +431,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
 
        /* create MQD for each KCQ */
        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               ring = &adev->gfx.compute_ring[i];
+               j = i + xcc_id * adev->gfx.num_compute_rings;
+               ring = &adev->gfx.compute_ring[j];
                if (!ring->mqd_obj) {
                        r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
                                                    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
@@ -424,9 +442,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
                                return r;
                        }
 
+                       ring->mqd_size = mqd_size;
                        /* prepare MQD backup */
-                       adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
-                       if (!adev->gfx.mec.mqd_backup[i])
+                       adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+                       if (!adev->gfx.mec.mqd_backup[j])
                                dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
                }
        }
@@ -434,10 +453,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
        return 0;
 }
 
-void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
 {
        struct amdgpu_ring *ring = NULL;
-       int i;
+       int i, j;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
 
        if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
                for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
@@ -450,43 +470,84 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
        }
 
        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               ring = &adev->gfx.compute_ring[i];
-               kfree(adev->gfx.mec.mqd_backup[i]);
+               j = i + xcc_id * adev->gfx.num_compute_rings;
+               ring = &adev->gfx.compute_ring[j];
+               kfree(adev->gfx.mec.mqd_backup[j]);
                amdgpu_bo_free_kernel(&ring->mqd_obj,
                                      &ring->mqd_gpu_addr,
                                      &ring->mqd_ptr);
        }
 
-       ring = &adev->gfx.kiq.ring;
+       ring = &kiq->ring;
+       kfree(kiq->mqd_backup);
        kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
        amdgpu_bo_free_kernel(&ring->mqd_obj,
                              &ring->mqd_gpu_addr,
                              &ring->mqd_ptr);
 }
 
-int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
 {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
        struct amdgpu_ring *kiq_ring = &kiq->ring;
        int i, r = 0;
+       int j;
 
        if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                return -EINVAL;
 
-       spin_lock(&adev->gfx.kiq.ring_lock);
-       if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
-                                       adev->gfx.num_compute_rings)) {
-               spin_unlock(&adev->gfx.kiq.ring_lock);
-               return -ENOMEM;
+       spin_lock(&kiq->ring_lock);
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+                                               adev->gfx.num_compute_rings)) {
+                       spin_unlock(&kiq->ring_lock);
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_compute_rings;
+                       kiq->pmf->kiq_unmap_queues(kiq_ring,
+                                                  &adev->gfx.compute_ring[i],
+                                                  RESET_QUEUES, 0, 0);
+               }
        }
 
-       for (i = 0; i < adev->gfx.num_compute_rings; i++)
-               kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
-                                          RESET_QUEUES, 0, 0);
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+               r = amdgpu_ring_test_helper(kiq_ring);
+       spin_unlock(&kiq->ring_lock);
 
-       if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
+       return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
+       int i, r = 0;
+       int j;
+
+       if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+               return -EINVAL;
+
+       spin_lock(&kiq->ring_lock);
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+                                               adev->gfx.num_gfx_rings)) {
+                       spin_unlock(&kiq->ring_lock);
+                       return -ENOMEM;
+               }
+
+               for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_gfx_rings;
+                       kiq->pmf->kiq_unmap_queues(kiq_ring,
+                                                  &adev->gfx.gfx_ring[i],
+                                                  PREEMPT_QUEUES, 0, 0);
+               }
+       }
+
+       if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
                r = amdgpu_ring_test_helper(kiq_ring);
-       spin_unlock(&adev->gfx.kiq.ring_lock);
+       spin_unlock(&kiq->ring_lock);
 
        return r;
 }
@@ -504,18 +565,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
        return set_resource_bit;
 }
 
-int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
 {
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
-       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
        uint64_t queue_mask = 0;
-       int r, i;
+       int r, i, j;
 
        if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
                return -EINVAL;
 
        for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
-               if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+               if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
                        continue;
 
                /* This situation may be hit in the future if a new HW
@@ -531,25 +592,66 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
 
        DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
                                                        kiq_ring->queue);
-       spin_lock(&adev->gfx.kiq.ring_lock);
-       r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
-                                       adev->gfx.num_compute_rings +
-                                       kiq->pmf->set_resources_size);
-       if (r) {
-               DRM_ERROR("Failed to lock KIQ (%d).\n", r);
-               spin_unlock(&adev->gfx.kiq.ring_lock);
-               return r;
+       spin_lock(&kiq->ring_lock);
+       /* No need to map kcq on the slave */
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+                                               adev->gfx.num_compute_rings +
+                                               kiq->pmf->set_resources_size);
+               if (r) {
+                       DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
+                       return r;
+               }
+
+               if (adev->enable_mes)
+                       queue_mask = ~0ULL;
+
+               kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+               for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_compute_rings;
+                       kiq->pmf->kiq_map_queues(kiq_ring,
+                                                &adev->gfx.compute_ring[i]);
+               }
        }
 
-       if (adev->enable_mes)
-               queue_mask = ~0ULL;
+       r = amdgpu_ring_test_helper(kiq_ring);
+       spin_unlock(&kiq->ring_lock);
+       if (r)
+               DRM_ERROR("KCQ enable failed\n");
+
+       return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+       struct amdgpu_ring *kiq_ring = &kiq->ring;
+       int r, i, j;
+
+       if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+               return -EINVAL;
+
+       spin_lock(&kiq->ring_lock);
+       /* No need to map kcq on the slave */
+       if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+               r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+                                               adev->gfx.num_gfx_rings);
+               if (r) {
+                       DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+                       spin_unlock(&adev->gfx.kiq[0].ring_lock);
+                       return r;
+               }
 
-       kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
-       for (i = 0; i < adev->gfx.num_compute_rings; i++)
-               kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+               for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+                       j = i + xcc_id * adev->gfx.num_gfx_rings;
+                       kiq->pmf->kiq_map_queues(kiq_ring,
+                                                &adev->gfx.gfx_ring[i]);
+               }
+       }
 
        r = amdgpu_ring_test_helper(kiq_ring);
-       spin_unlock(&adev->gfx.kiq.ring_lock);
+       spin_unlock(&kiq->ring_lock);
        if (r)
                DRM_ERROR("KCQ enable failed\n");
 
@@ -725,7 +827,7 @@ int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
 
        /* If not define special ras_late_init function, use gfx default ras_late_init */
        if (!ras->ras_block.ras_late_init)
-               ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+               ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
 
        /* If not defined special ras_cb function, use default ras_cb */
        if (!ras->ras_block.ras_cb)
@@ -787,7 +889,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
        signed long r, cnt = 0;
        unsigned long flags;
        uint32_t seq, reg_val_offs = 0, value = 0;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
        struct amdgpu_ring *ring = &kiq->ring;
 
        if (amdgpu_device_skip_hw_access(adev))
@@ -855,7 +957,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
        signed long r, cnt = 0;
        unsigned long flags;
        uint32_t seq;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+       struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
        struct amdgpu_ring *ring = &kiq->ring;
 
        BUG_ON(!ring->funcs->emit_wreg);
@@ -1059,3 +1161,9 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
                adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
        }
 }
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+       return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+                       adev->gfx.num_xcc_per_xcp : 1));
+}
This page took 0.053068 seconds and 4 git commands to generate.