]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
Merge tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.h
index b2841195bd3bbae0d0763d2370cc15c9ddaf9a99..6c76bb2a684325925ca7952d0d0a41c3372096e4 100644 (file)
@@ -29,6 +29,7 @@
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "ta_ras_if.h"
+#include "amdgpu_ras_eeprom.h"
 
 enum amdgpu_ras_block {
        AMDGPU_RAS_BLOCK__UMC = 0,
@@ -52,6 +53,236 @@ enum amdgpu_ras_block {
 #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
 #define AMDGPU_RAS_BLOCK_MASK  ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
 
+enum amdgpu_ras_gfx_subblock {
+       /* CPC */
+       AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+       AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+               AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+       AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+       AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+       AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+       AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+       AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+       AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+       AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+       /* CPF */
+       AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+               AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+       AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+       AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+       /* CPG */
+       AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+               AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+       AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+       AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+       /* GDS */
+       AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+       AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+       AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+       AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+       AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+       /* SPI */
+       AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+       /* SQ */
+       AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+       AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+       AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+       AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+       /* SQC (3 ranges) */
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+       /* SQC range 0 */
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+               AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+               AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+               AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+       /* SQC range 1 */
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+               AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+               AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+       /* SQC range 2 */
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+               AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+               AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+       AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+       /* TA */
+       AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+               AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+       AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+       AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+       AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+       AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+       /* TCA */
+       AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+               AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+       /* TCC (5 sub-ranges) */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+       /* TCC range 0 */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+       AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+       AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+       /* TCC range 1 */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+       /* TCC range 2 */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+       AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+       AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+       AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+       AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+       /* TCC range 3 */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+       /* TCC range 4 */
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+       AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+       AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+       /* TCI */
+       AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+       /* TCP */
+       AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+               AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+       AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+       AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+       AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+       AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+       /* TD */
+       AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+               AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+       AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+       AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+       /* EA (3 sub-ranges) */
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+       /* EA range 0 */
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+               AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+               AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+               AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+       /* EA range 1 */
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+               AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+               AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+       /* EA range 2 */
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+               AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+       AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+               AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+       AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+               AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+       /* UTC VM L2 bank */
+       AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+       /* UTC VM walker */
+       AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+       /* UTC ATC L2 2MB cache */
+       AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+       /* UTC ATC L2 4KB cache */
+       AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+       AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
 enum amdgpu_ras_error_type {
        AMDGPU_RAS_ERROR__NONE                                                  = 0,
        AMDGPU_RAS_ERROR__PARITY                                                = 1,
@@ -76,9 +307,6 @@ struct ras_common_if {
        char name[32];
 };
 
-typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
-               struct amdgpu_iv_entry *entry);
-
 struct amdgpu_ras {
        /* ras infrastructure */
        /* for ras itself. */
@@ -106,10 +334,85 @@ struct amdgpu_ras {
        struct mutex recovery_lock;
 
        uint32_t flags;
+
+       struct amdgpu_ras_eeprom_control eeprom_control;
 };
 
-/* interfaces for IP */
+struct ras_fs_data {
+       char sysfs_name[32];
+       char debugfs_name[32];
+};
+
+struct ras_err_data {
+       unsigned long ue_count;
+       unsigned long ce_count;
+       unsigned long err_addr_cnt;
+       uint64_t *err_addr;
+};
 
+struct ras_err_handler_data {
+       /* point to bad pages array */
+       struct {
+               unsigned long bp;
+               struct amdgpu_bo *bo;
+       } *bps;
+       /* the count of entries */
+       int count;
+       /* the space can place new entries */
+       int space_left;
+       /* last reserved entry's index + 1 */
+       int last_reserved;
+};
+
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+               struct ras_err_data *err_data,
+               struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+       /* interrupt bottom half */
+       struct work_struct ih_work;
+       int inuse;
+       /* IP callback */
+       ras_ih_cb cb;
+       /* full of entries */
+       unsigned char *ring;
+       unsigned int ring_size;
+       unsigned int element_size;
+       unsigned int aligned_element_size;
+       unsigned int rptr;
+       unsigned int wptr;
+};
+
+struct ras_manager {
+       struct ras_common_if head;
+       /* reference count */
+       int use;
+       /* ras block link */
+       struct list_head node;
+       /* the device */
+       struct amdgpu_device *adev;
+       /* debugfs */
+       struct dentry *ent;
+       /* sysfs */
+       struct device_attribute sysfs_attr;
+       int attr_inuse;
+
+       /* fs node name */
+       struct ras_fs_data fs_data;
+
+       /* IH data */
+       struct ras_ih_data ih_data;
+
+       struct ras_err_data err_data;
+};
+
+struct ras_badpage {
+       unsigned int bp;
+       unsigned int size;
+       unsigned int flags;
+};
+
+/* interfaces for IP */
 struct ras_fs_if {
        struct ras_common_if head;
        char sysfs_name[32];
@@ -184,7 +487,7 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
 void amdgpu_ras_resume(struct amdgpu_device *adev);
 void amdgpu_ras_suspend(struct amdgpu_device *adev);
 
-int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
                bool is_ce);
 
 /* error handling functions */
This page took 0.044341 seconds and 4 git commands to generate.