Merge tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio

[J-linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index b2667342cf674e73439c0061e7725ee844e44ef5..6b8d7bb83bb3c30c33962a5bbf19145c760e81f8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -31,6 +31,10 @@
  #include "ta_ras_if.h"
  #include "amdgpu_ras_eeprom.h"
  
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS          (0x1 << 0)
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET                (0x1 << 1)
+#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV     (0x1 << 2)
+
  enum amdgpu_ras_block {
         AMDGPU_RAS_BLOCK__UMC = 0,
         AMDGPU_RAS_BLOCK__SDMA,
@@ -336,6 +340,12 @@ struct amdgpu_ras {
         struct amdgpu_ras_eeprom_control eeprom_control;
  
         bool error_query_ready;
+
+       /* bad page count threshold */
+       uint32_t bad_page_cnt_threshold;
+
+       /* disable ras error count harvest in recovery */
+       bool disable_ras_err_cnt_harvest;
  };
  
  struct ras_fs_data {
@@ -490,6 +500,8 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev);
  unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
                 bool is_ce);
  
+bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);
+
  /* error handling functions */
  int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
                 struct eeprom_table_record *bps, int pages);
@@ -500,10 +512,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
  {
         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
  
-       /* save bad page to eeprom before gpu reset,
-        * i2c may be unstable in gpu reset
+       /*
+        * Save bad page to eeprom before gpu reset, i2c may be unstable
+        * in gpu reset.
+        *
+        * Also, exclude the case when ras recovery issuer is
+        * eeprom page write itself.
          */
-       if (in_task())
+       if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
                 amdgpu_ras_reserve_bad_pages(adev);
  
         if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)