#include "soc15_common.h"
#include "soc15.h"
+#include "amdgpu_ras.h"
#define regVM_L2_CNTL3_DEFAULT 0x80100007
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
}
WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp);
- }
+ }
}
static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev)
static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub;
- unsigned num_level, block_size;
+ unsigned int num_level, block_size;
uint32_t tmp, inst_mask;
int i, j;
.set_clockgating = mmhub_v1_8_set_clockgating,
.get_clockgating = mmhub_v1_8_get_clockgating,
};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = {
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI),
+ 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"},
+ {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI),
+ 1, 0, "MM_CANE"},
+};
+
+static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = {
+ {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"},
+ {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"},
+ {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"},
+ {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"},
+ {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"},
+ {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"},
+ {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"},
+ {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"},
+ {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"},
+ {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"},
+ {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"},
+ {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"},
+ {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"},
+ {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"},
+ {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"},
+ {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"},
+};
+
+static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst,
+ void *ras_err_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
+
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE,
+ &err_data->ce_count);
+ amdgpu_ras_inst_query_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_v1_8_ras_memory_list,
+ ARRAY_SIZE(mmhub_v1_8_ras_memory_list),
+ mmhub_inst,
+ AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ &err_data->ue_count);
+}
+
+static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_err_status)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status);
+}
+
+static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ce_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ce_reg_list),
+ mmhub_inst);
+ amdgpu_ras_inst_reset_ras_error_count(adev,
+ mmhub_v1_8_ue_reg_list,
+ ARRAY_SIZE(mmhub_v1_8_ue_reg_list),
+ mmhub_inst);
+}
+
+static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_error_count(adev, i);
+}
+
+static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = {
+ regMMEA0_ERR_STATUS,
+ regMMEA1_ERR_STATUS,
+ regMMEA2_ERR_STATUS,
+ regMMEA3_ERR_STATUS,
+ regMMEA4_ERR_STATUS,
+};
+
+static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t reg_value;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t i;
+
+ /* query mmea ras err status */
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MMEA%d err in MMHUB%d, status: 0x%x\n",
+ i, mmhub_inst, reg_value);
+ }
+ }
+
+ /* query mm_cane ras err status */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) {
+ dev_warn(adev->dev,
+ "Detected MM CANE err in MMHUB%d, status: 0x%x\n",
+ mmhub_inst, reg_value);
+ }
+}
+
+static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_query_ras_err_status(adev, i);
+}
+
+static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev,
+ uint32_t mmhub_inst)
+{
+ uint32_t mmea_cgtt_clk_cntl_addr_dist;
+ uint32_t mmea_err_status_addr_dist;
+ uint32_t reg_value;
+ uint32_t i;
+
+ /* reset mmea ras err status */
+ mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL;
+ mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS;
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) {
+ /* force clk branch on for response path
+ * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1
+ */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+
+ /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_ERR_STATUS,
+ i * mmea_err_status_addr_dist,
+ reg_value);
+
+ /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */
+ reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist);
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL,
+ SOFT_OVERRIDE_RETURN, 0);
+ WREG32_SOC15_OFFSET(MMHUB, mmhub_inst,
+ regMMEA0_CGTT_CLK_CTRL,
+ i * mmea_cgtt_clk_cntl_addr_dist,
+ reg_value);
+ }
+
+ /* reset mm_cane ras err status
+ * force clk branch on for response path
+ * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1
+ */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+
+ /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 1);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value);
+
+ /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */
+ reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL);
+ reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL,
+ SOFT_OVERRIDE_ATRET, 0);
+ WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value);
+}
+
+static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ uint32_t inst_mask;
+ uint32_t i;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ dev_warn(adev->dev, "MMHUB RAS is not supported\n");
+ return;
+ }
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask)
+ mmhub_v1_8_inst_reset_ras_err_status(adev, i);
+}
+
+static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
+ .query_ras_error_count = mmhub_v1_8_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
+ .query_ras_error_status = mmhub_v1_8_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status,
+};
+
+struct amdgpu_mmhub_ras mmhub_v1_8_ras = {
+ .ras_block = {
+ .hw_ops = &mmhub_v1_8_ras_hw_ops,
+ },
+};