X-Git-Url: https://repo.jachan.dev/linux.git/blobdiff_plain/1156e1a60f024ce29fbb88dd330c2be81c4303ea..901bdf5ea1a836400ee69aa32b04e9c209271ec7:/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 90f5d302d5f3..a33d4bc34cee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -28,6 +28,7 @@ #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_xcp.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -209,12 +210,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); - int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; if (multipipe_policy) { /* policy: make queues evenly cross all pipes on MEC1 only * for multiple xcc, just use the original policy for simplicity */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; i++) { pipe = i % adev->gfx.mec.num_pipe_per_mec; queue = (i / adev->gfx.mec.num_pipe_per_mec) % @@ -226,13 +227,13 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) } } else { /* policy: amdgpu owns all queues in the given pipe */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; ++i) set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); } } - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } @@ -313,14 +314,12 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id; - ring->vm_hub = AMDGPU_GFXHUB_0; - if (xcc_id >= 1) - ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start + - xcc_id - 1; - else - ring->doorbell_index = adev->doorbell_index.kiq; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + ring->doorbell_index = + (adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) @@ -382,6 +381,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, int r, i, j; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; + u32 domain = AMDGPU_GEM_DOMAIN_GTT; + + /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) + domain |= AMDGPU_GEM_DOMAIN_VRAM; /* create MQD for KIQ */ if (!adev->enable_mes_kiq && !ring->mqd_obj) { @@ -413,7 +417,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring = &adev->gfx.gfx_ring[i]; if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + domain, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); @@ -435,7 +439,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring = &adev->gfx.compute_ring[j]; if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + domain, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); @@ -480,7 +484,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) ring = &kiq->ring; kfree(kiq->mqd_backup); - kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); @@ -497,22 +500,20 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) return -EINVAL; spin_lock(&kiq->ring_lock); - if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_compute_rings)) { - spin_unlock(&kiq->ring_lock); - return -ENOMEM; - } + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) { + spin_unlock(&kiq->ring_lock); + return -ENOMEM; + } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_unmap_queues(kiq_ring, - &adev->gfx.compute_ring[i], - RESET_QUEUES, 0, 0); - } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_unmap_queues(kiq_ring, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); } - if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); @@ -540,7 +541,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; kiq->pmf->kiq_unmap_queues(kiq_ring, - &adev->gfx.gfx_ring[i], + &adev->gfx.gfx_ring[j], PREEMPT_QUEUES, 0, 0); } } @@ -592,27 +593,26 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); + amdgpu_device_flush_hdp(adev, NULL); + spin_lock(&kiq->ring_lock); - /* No need to map kcq on the slave */ - if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_compute_rings + - kiq->pmf->set_resources_size); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return r; - } + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + spin_unlock(&kiq->ring_lock); + return r; + } - if (adev->enable_mes) - queue_mask = ~0ULL; + if (adev->enable_mes) + queue_mask = ~0ULL; - kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.compute_ring[i]); - } + &adev->gfx.compute_ring[j]); } r = amdgpu_ring_test_helper(kiq_ring); @@ -632,6 +632,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_map_queues) return -EINVAL; + amdgpu_device_flush_hdp(adev, NULL); + spin_lock(&kiq->ring_lock); /* No need to map kcq on the slave */ if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { @@ -639,14 +641,14 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) adev->gfx.num_gfx_rings); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - spin_unlock(&adev->gfx.kiq[0].ring_lock); + spin_unlock(&kiq->ring_lock); return r; } for (i = 0; i < adev->gfx.num_gfx_rings; i++) { j = i + xcc_id * adev->gfx.num_gfx_rings; kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.gfx_ring[i]); + &adev->gfx.gfx_ring[j]); } } @@ -788,9 +790,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (r) return r; - r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); - if (r) - goto late_fini; + if (adev->gfx.cp_ecc_error_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) + goto late_fini; + } } else { amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); } @@ -884,6 +888,25 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, + void *ras_error_status, + void (*func)(struct amdgpu_device *adev, void *ras_error_status, + int xcc_id)) +{ + int i; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; + uint32_t xcc_mask = GENMASK(num_xcc - 1, 0); + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + if (err_data) { + err_data->ue_count = 0; + err_data->ce_count = 0; + } + + for_each_inst(i, xcc_mask) + func(adev, ras_error_status, i); +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; @@ -1167,3 +1190,119 @@ bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? adev->gfx.num_xcc_per_xcp : 1)); } + +static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int mode; + + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); + + return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode)); +} + +static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, + struct device_attribute *addr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_gfx_partition mode; + int ret = 0, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (num_xcc % 2 != 0) + return -EINVAL; + + if (!strncasecmp("SPX", buf, strlen("SPX"))) { + mode = AMDGPU_SPX_PARTITION_MODE; + } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { + /* + * DPX mode needs AIDs to be in multiple of 2. + * Each AID connects 2 XCCs. + */ + if (num_xcc%4) + return -EINVAL; + mode = AMDGPU_DPX_PARTITION_MODE; + } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { + if (num_xcc != 6) + return -EINVAL; + mode = AMDGPU_TPX_PARTITION_MODE; + } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { + if (num_xcc != 8) + return -EINVAL; + mode = AMDGPU_QPX_PARTITION_MODE; + } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { + mode = AMDGPU_CPX_PARTITION_MODE; + } else { + return -EINVAL; + } + + ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + char *supported_partition; + + /* TBD */ + switch (NUM_XCC(adev->gfx.xcc_mask)) { + case 8: + supported_partition = "SPX, DPX, QPX, CPX"; + break; + case 6: + supported_partition = "SPX, TPX, CPX"; + break; + case 4: + supported_partition = "SPX, DPX, CPX"; + break; + /* this seems only existing in emulation phase */ + case 2: + supported_partition = "SPX, CPX"; + break; + default: + supported_partition = "Not supported"; + break; + } + + return sysfs_emit(buf, "%s\n", supported_partition); +} + +static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, + amdgpu_gfx_get_current_compute_partition, + amdgpu_gfx_set_compute_partition); + +static DEVICE_ATTR(available_compute_partition, S_IRUGO, + amdgpu_gfx_get_available_compute_partition, NULL); + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = device_create_file(adev->dev, &dev_attr_current_compute_partition); + if (r) + return r; + + r = device_create_file(adev->dev, &dev_attr_available_compute_partition); + + return r; +} + +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_compute_partition); + device_remove_file(adev->dev, &dev_attr_available_compute_partition); +}