#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1)
+#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2)
+
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
AMDGPU_RAS_BLOCK__SDMA,
struct amdgpu_ras_eeprom_control eeprom_control;
bool error_query_ready;
+
+ /* bad page count threshold */
+ uint32_t bad_page_cnt_threshold;
+
+ /* disable ras error count harvest in recovery */
+ bool disable_ras_err_cnt_harvest;
};
struct ras_fs_data {
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce);
+bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);
+
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct eeprom_table_record *bps, int pages);
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
- /* save bad page to eeprom before gpu reset,
- * i2c may be unstable in gpu reset
+ /*
+ * Save bad page to eeprom before gpu reset, i2c may be unstable
+ * in gpu reset.
+ *
+ * Also, exclude the case when ras recovery issuer is
+ * eeprom page write itself.
*/
- if (in_task())
+ if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
amdgpu_ras_reserve_bad_pages(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)