]> Git Repo - linux.git/commitdiff
drm/amdgpu: handle extra UE register entries for gfx v9_4_3
authorTao Zhou <[email protected]>
Tue, 31 Oct 2023 02:39:51 +0000 (10:39 +0800)
committerAlex Deucher <[email protected]>
Tue, 7 Nov 2023 17:03:31 +0000 (12:03 -0500)
The UE registe list is larger than CE list.

Reported-by: [email protected]
Signed-off-by: Tao Zhou <[email protected]>
Reviewed-by: Stanley.Yang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

index 41bbabd9ad4db5c9324055d53b5765b92a8cafdf..046ae95b366ae2f5475ee7d36002587bd8df5364 100644 (file)
@@ -3799,6 +3799,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev,
                }
        }
 
+       /* handle extra register entries of UE */
+       for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+               for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+                       for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+                               /* no need to select if instance number is 1 */
+                               if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+                                       gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+                                       gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+                               amdgpu_ras_inst_query_ras_error_count(adev,
+                                       &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+                                       1,
+                                       gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent,
+                                       gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size,
+                                       GET_INST(GC, xcc_id),
+                                       AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+                                       &ue_count);
+                       }
+               }
+       }
+
        gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
                        xcc_id);
        mutex_unlock(&adev->grbm_idx_mutex);
@@ -3838,6 +3859,23 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev,
                }
        }
 
+       /* handle extra register entries of UE */
+       for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) {
+               for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) {
+                       for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) {
+                               /* no need to select if instance number is 1 */
+                               if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 ||
+                                       gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1)
+                                       gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id);
+
+                               amdgpu_ras_inst_reset_ras_error_count(adev,
+                                       &(gfx_v9_4_3_ue_reg_list[i].reg_entry),
+                                       1,
+                                       GET_INST(GC, xcc_id));
+                       }
+               }
+       }
+
        gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
                        xcc_id);
        mutex_unlock(&adev->grbm_idx_mutex);
This page took 0.065191 seconds and 4 git commands to generate.