]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[linux.git] / drivers / gpu / drm / amd / amdgpu / mmhub_v1_8.c
index a8faf66b68780d97eedc211dc4b22265d00ceec4..5e8b493f86995457158ae1318aea2442211ec8e3 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "soc15_common.h"
 #include "soc15.h"
+#include "amdgpu_ras.h"
 
 #define regVM_L2_CNTL3_DEFAULT 0x80100007
 #define regVM_L2_CNTL4_DEFAULT 0x000000c1
@@ -270,7 +271,7 @@ static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev)
                                            VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
                }
                WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
-       }
+       }
 }
 
 static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
@@ -327,7 +328,7 @@ static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev)
 static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
 {
        struct amdgpu_vmhub *hub;
-       unsigned num_level, block_size;
+       unsigned int num_level, block_size;
        uint32_t tmp, inst_mask;
        int i, j;
 
@@ -579,3 +580,277 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
        .set_clockgating = mmhub_v1_8_set_clockgating,
        .get_clockgating = mmhub_v1_8_get_clockgating,
 };
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+       1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+       1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+       {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+       1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+       {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+       {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+       {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+       {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+       {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+       {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+       {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+       {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+       {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+       {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+       {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+       {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+       {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+       {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+       {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+       {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+       {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+       {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+       {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+                                                 uint32_t mmhub_inst,
+                                                 void *ras_err_status)
+{
+       struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+       amdgpu_ras_inst_query_ras_error_count(adev,
+                                       mmhub_v1_8_ce_reg_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+                                       mmhub_v1_8_ras_memory_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+                                       mmhub_inst,
+                                       AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+                                       &err_data->ce_count);
+       amdgpu_ras_inst_query_ras_error_count(adev,
+                                       mmhub_v1_8_ue_reg_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+                                       mmhub_v1_8_ras_memory_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+                                       mmhub_inst,
+                                       AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+                                       &err_data->ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+                                            void *ras_err_status)
+{
+       uint32_t inst_mask;
+       uint32_t i;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+               return;
+       }
+
+       inst_mask = adev->aid_mask;
+       for_each_inst(i, inst_mask)
+               mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+                                                 uint32_t mmhub_inst)
+{
+       amdgpu_ras_inst_reset_ras_error_count(adev,
+                                       mmhub_v1_8_ce_reg_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+                                       mmhub_inst);
+       amdgpu_ras_inst_reset_ras_error_count(adev,
+                                       mmhub_v1_8_ue_reg_list,
+                                       ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+                                       mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+       uint32_t inst_mask;
+       uint32_t i;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+               return;
+       }
+
+       inst_mask = adev->aid_mask;
+       for_each_inst(i, inst_mask)
+               mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
+       regMMEA0_ERR_STATUS,
+       regMMEA1_ERR_STATUS,
+       regMMEA2_ERR_STATUS,
+       regMMEA3_ERR_STATUS,
+       regMMEA4_ERR_STATUS,
+};
+
+static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
+                                                uint32_t mmhub_inst)
+{
+       uint32_t reg_value;
+       uint32_t mmea_err_status_addr_dist;
+       uint32_t i;
+
+       /* query mmea ras err status */
+       mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+       for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                               regMMEA0_ERR_STATUS,
+                                               i * mmea_err_status_addr_dist);
+               if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+                   REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+                       dev_warn(adev->dev,
+                                "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
+                                i, mmhub_inst, reg_value);
+               }
+       }
+
+       /* query mm_cane ras err status */
+       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+       if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
+           REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
+           REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
+               dev_warn(adev->dev,
+                        "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
+                        mmhub_inst, reg_value);
+       }
+}
+
+static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
+{
+       uint32_t inst_mask;
+       uint32_t i;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+               return;
+       }
+
+       inst_mask = adev->aid_mask;
+       for_each_inst(i, inst_mask)
+               mmhub_v1_8_inst_query_ras_err_status(adev, i);
+}
+
+static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
+                                                uint32_t mmhub_inst)
+{
+       uint32_t mmea_cgtt_clk_cntl_addr_dist;
+       uint32_t mmea_err_status_addr_dist;
+       uint32_t reg_value;
+       uint32_t i;
+
+       /* reset mmea ras err status */
+       mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
+       mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+       for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+               /* force clk branch on for response path
+                * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
+                */
+               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                               regMMEA0_CGTT_CLK_CTRL,
+                                               i * mmea_cgtt_clk_cntl_addr_dist);
+               reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+                                         SOFT_OVERRIDE_RETURN, 1);
+               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                   regMMEA0_CGTT_CLK_CTRL,
+                                   i * mmea_cgtt_clk_cntl_addr_dist,
+                                   reg_value);
+
+               /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                               regMMEA0_ERR_STATUS,
+                                               i * mmea_err_status_addr_dist);
+               reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+                                         CLEAR_ERROR_STATUS, 1);
+               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                   regMMEA0_ERR_STATUS,
+                                   i * mmea_err_status_addr_dist,
+                                   reg_value);
+
+               /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
+               reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                               regMMEA0_CGTT_CLK_CTRL,
+                                               i * mmea_cgtt_clk_cntl_addr_dist);
+               reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+                                         SOFT_OVERRIDE_RETURN, 0);
+               WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+                                   regMMEA0_CGTT_CLK_CTRL,
+                                   i * mmea_cgtt_clk_cntl_addr_dist,
+                                   reg_value);
+       }
+
+       /* reset mm_cane ras err status
+        * force clk branch on for response path
+        * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
+        */
+       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+                                 SOFT_OVERRIDE_ATRET, 1);
+       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+
+       /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
+                                 CLEAR_ERROR_STATUS, 1);
+       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
+
+       /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
+       reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+       reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+                                 SOFT_OVERRIDE_ATRET, 0);
+       WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+}
+
+static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
+{
+       uint32_t inst_mask;
+       uint32_t i;
+
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+               return;
+       }
+
+       inst_mask = adev->aid_mask;
+       for_each_inst(i, inst_mask)
+               mmhub_v1_8_inst_reset_ras_err_status(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+       .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+       .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+       .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
+       .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+       .ras_block = {
+               .hw_ops = &mmhub_v1_8_ras_hw_ops,
+       },
+};
This page took 0.043692 seconds and 4 git commands to generate.