Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 1bedb416eebd00e841d96005b49e3c071dda60ea..c136bd4497446e4b920a36618aba0cc89df51425 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -34,6 +34,8 @@
  #include "amdgpu_xgmi.h"
  #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
  
+static const char *RAS_FS_NAME = "ras";
+
  const char *ras_error_string[] = {
         "none",
         "parity",
@@ -62,13 +64,14 @@ const char *ras_block_string[] = {
  #define ras_err_str(i) (ras_error_string[ffs(i)])
  #define ras_block_str(i) (ras_block_string[i])
  
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS          1
-#define AMDGPU_RAS_FLAG_INIT_NEED_RESET                2
  #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
  
  /* inject address is 52 bits */
  #define        RAS_UMC_INJECT_ADDR_LIMIT       (0x1ULL << 52)
  
+/* typical ECC bad page rate(1 bad page per 100MB VRAM) */
+#define RAS_BAD_PAGE_RATE              (100 * 1024 * 1024ULL)
+
  enum amdgpu_ras_retire_page_reservation {
         AMDGPU_RAS_RETIRE_PAGE_RESERVED,
         AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -77,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation {
  
  atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
  
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+                               uint64_t addr);
  static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
                                 uint64_t addr);
  
@@ -367,12 +372,19 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
  static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
                 size_t size, loff_t *pos)
  {
-       struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+       struct amdgpu_device *adev =
+               (struct amdgpu_device *)file_inode(f)->i_private;
         int ret;
  
-       ret = amdgpu_ras_eeprom_reset_table(&adev->psp.ras.ras->eeprom_control);
+       ret = amdgpu_ras_eeprom_reset_table(
+                       &(amdgpu_ras_get_context(adev)->eeprom_control));
  
-       return ret == 1 ? size : -EIO;
+       if (ret == 1) {
+               amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
+               return size;
+       } else {
+               return -EIO;
+       }
  }
  
  static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
@@ -506,9 +518,9 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
  /* obj end */
  
  static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
-                                 const char*           invoke_type,
-                                 const char*           block_name,
-                                 enum ta_ras_status    ret)
+                                        const char* invoke_type,
+                                        const char* block_name,
+                                        enum ta_ras_status ret)
  {
         switch (ret) {
         case TA_RAS_STATUS__SUCCESS:
@@ -597,7 +609,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
         if (!con)
                 return -EINVAL;
  
-        info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+       info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
         if (!info)
                 return -ENOMEM;
  
@@ -893,13 +905,6 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
         return ret;
  }
  
-int amdgpu_ras_error_cure(struct amdgpu_device *adev,
-               struct ras_cure_if *info)
-{
-       /* psp fw has no cure interface for now. */
-       return 0;
-}
-
  /* get the total error counts on all IPs */
  unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
                 bool is_ce)
@@ -943,7 +948,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
         case AMDGPU_RAS_RETIRE_PAGE_FAULT:
         default:
                 return "F";
-       };
+       }
  }
  
  /**
@@ -1017,45 +1022,13 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
         return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
  }
  
-static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
+static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct attribute *attrs[] = {
-               &con->features_attr.attr,
-               NULL
-       };
-       struct bin_attribute *bin_attrs[] = {
-               &con->badpages_attr,
-               NULL
-       };
-       struct attribute_group group = {
-               .name = "ras",
-               .attrs = attrs,
-               .bin_attrs = bin_attrs,
-       };
  
-       con->features_attr = (struct device_attribute) {
-               .attr = {
-                       .name = "features",
-                       .mode = S_IRUGO,
-               },
-                       .show = amdgpu_ras_sysfs_features_read,
-       };
-
-       con->badpages_attr = (struct bin_attribute) {
-               .attr = {
-                       .name = "gpu_vram_bad_pages",
-                       .mode = S_IRUGO,
-               },
-               .size = 0,
-               .private = NULL,
-               .read = amdgpu_ras_sysfs_badpages_read,
-       };
-
-       sysfs_attr_init(attrs[0]);
-       sysfs_bin_attr_init(bin_attrs[0]);
-
-       return sysfs_create_group(&adev->dev->kobj, &group);
+       sysfs_remove_file_from_group(&adev->dev->kobj,
+                               &con->badpages_attr.attr,
+                               RAS_FS_NAME);
  }
  
  static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
@@ -1065,14 +1038,9 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
                 &con->features_attr.attr,
                 NULL
         };
-       struct bin_attribute *bin_attrs[] = {
-               &con->badpages_attr,
-               NULL
-       };
         struct attribute_group group = {
-               .name = "ras",
+               .name = RAS_FS_NAME,
                 .attrs = attrs,
-               .bin_attrs = bin_attrs,
         };
  
         sysfs_remove_group(&adev->dev->kobj, &group);
@@ -1105,7 +1073,7 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
  
         if (sysfs_add_file_to_group(&adev->dev->kobj,
                                 &obj->sysfs_attr.attr,
-                               "ras")) {
+                               RAS_FS_NAME)) {
                 put_obj(obj);
                 return -EINVAL;
         }
@@ -1125,7 +1093,7 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
  
         sysfs_remove_file_from_group(&adev->dev->kobj,
                                 &obj->sysfs_attr.attr,
-                               "ras");
+                               RAS_FS_NAME);
         obj->attr_inuse = 0;
         put_obj(obj);
  
@@ -1141,6 +1109,9 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
                 amdgpu_ras_sysfs_remove(adev, &obj->head);
         }
  
+       if (amdgpu_bad_page_threshold != 0)
+               amdgpu_ras_sysfs_remove_bad_page_node(adev);
+
         amdgpu_ras_sysfs_remove_feature_node(adev);
  
         return 0;
@@ -1169,9 +1140,9 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
  static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct drm_minor *minor = adev->ddev->primary;
+       struct drm_minor *minor = adev_to_drm(adev)->primary;
  
-       con->dir = debugfs_create_dir("ras", minor->debugfs_root);
+       con->dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
         debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
                                 adev, &amdgpu_ras_debugfs_ctrl_ops);
         debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
@@ -1187,9 +1158,16 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
          */
         debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
                                 &con->reboot);
+
+       /*
+        * User could set this not to clean up hardware's error count register
+        * of RAS IPs during ras recovery.
+        */
+       debugfs_create_bool("disable_ras_err_cnt_harvest", 0644,
+                       con->dir, &con->disable_ras_err_cnt_harvest);
  }
  
-void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
                 struct ras_fs_if *head)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1219,7 +1197,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
          * it won't be called in resume path, no need to check
          * suspend and gpu reset status
          */
-       if (!con)
+       if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
                 return;
  
         amdgpu_ras_debugfs_create_ctrl_node(adev);
@@ -1235,7 +1213,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
         }
  }
  
-void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
                 struct ras_common_if *head)
  {
         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
@@ -1261,17 +1239,51 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
  /* debugfs end */
  
  /* ras fs */
-
+static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+               amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static DEVICE_ATTR(features, S_IRUGO,
+               amdgpu_ras_sysfs_features_read, NULL);
  static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
  {
-       amdgpu_ras_sysfs_create_feature_node(adev);
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct attribute_group group = {
+               .name = RAS_FS_NAME,
+       };
+       struct attribute *attrs[] = {
+               &con->features_attr.attr,
+               NULL
+       };
+       struct bin_attribute *bin_attrs[] = {
+               NULL,
+               NULL,
+       };
+       int r;
+
+       /* add features entry */
+       con->features_attr = dev_attr_features;
+       group.attrs = attrs;
+       sysfs_attr_init(attrs[0]);
+
+       if (amdgpu_bad_page_threshold != 0) {
+               /* add bad_page_features entry */
+               bin_attr_gpu_vram_bad_pages.private = NULL;
+               con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+               bin_attrs[0] = &con->badpages_attr;
+               group.bin_attrs = bin_attrs;
+               sysfs_bin_attr_init(bin_attrs[0]);
+       }
+
+       r = sysfs_create_group(&adev->dev->kobj, &group);
+       if (r)
+               dev_err(adev->dev, "Failed to create RAS sysfs group!");
  
         return 0;
  }
  
  static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
  {
-       amdgpu_ras_debugfs_remove_all(adev);
+       if (IS_ENABLED(CONFIG_DEBUG_FS))
+               amdgpu_ras_debugfs_remove_all(adev);
         amdgpu_ras_sysfs_remove_all(adev);
         return 0;
  }
@@ -1456,6 +1468,45 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
         }
  }
  
+/* Parse RdRspStatus and WrRspStatus */
+static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
+                                         struct ras_query_if *info)
+{
+       /*
+        * Only two block need to query read/write
+        * RspStatus at current state
+        */
+       switch (info->head.block) {
+       case AMDGPU_RAS_BLOCK__GFX:
+               if (adev->gfx.funcs->query_ras_error_status)
+                       adev->gfx.funcs->query_ras_error_status(adev);
+               break;
+       case AMDGPU_RAS_BLOCK__MMHUB:
+               if (adev->mmhub.funcs->query_ras_error_status)
+                       adev->mmhub.funcs->query_ras_error_status(adev);
+               break;
+       default:
+               break;
+       }
+}
+
+static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct ras_manager *obj;
+
+       if (!con)
+               return;
+
+       list_for_each_entry(obj, &con->head, node) {
+               struct ras_query_if info = {
+                       .head = obj->head,
+               };
+
+               amdgpu_ras_error_status_query(adev, &info);
+       }
+}
+
  /* recovery begin */
  
  /* return 0 on success.
@@ -1492,10 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
                         .size = AMDGPU_GPU_PAGE_SIZE,
                         .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
                 };
-
-               if (data->last_reserved <= i)
+               ret = amdgpu_vram_mgr_query_page_status(
+                               ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+                               data->bps[i].retired_page);
+               if (ret == -EBUSY)
                         (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
-               else if (data->bps_bo[i] == NULL)
+               else if (ret == -ENOENT)
                         (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
         }
  
@@ -1512,23 +1565,30 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
         struct amdgpu_device *remote_adev = NULL;
         struct amdgpu_device *adev = ras->adev;
         struct list_head device_list, *device_list_handle =  NULL;
-       struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false);
-
-       /* Build list of devices to query RAS related errors */
-       if  (hive && adev->gmc.xgmi.num_physical_nodes > 1)
-               device_list_handle = &hive->device_list;
-       else {
-               INIT_LIST_HEAD(&device_list);
-               list_add_tail(&adev->gmc.xgmi.head, &device_list);
-               device_list_handle = &device_list;
-       }
  
-       list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) {
-               amdgpu_ras_log_on_err_counter(remote_adev);
+       if (!ras->disable_ras_err_cnt_harvest) {
+               struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+               /* Build list of devices to query RAS related errors */
+               if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
+                       device_list_handle = &hive->device_list;
+               } else {
+                       INIT_LIST_HEAD(&device_list);
+                       list_add_tail(&adev->gmc.xgmi.head, &device_list);
+                       device_list_handle = &device_list;
+               }
+
+               list_for_each_entry(remote_adev,
+                               device_list_handle, gmc.xgmi.head) {
+                       amdgpu_ras_query_err_status(remote_adev);
+                       amdgpu_ras_log_on_err_counter(remote_adev);
+               }
+
+               amdgpu_put_xgmi_hive(hive);
         }
  
         if (amdgpu_device_should_recover_gpu(ras->adev))
-               amdgpu_device_gpu_recover(ras->adev, 0);
+               amdgpu_device_gpu_recover(ras->adev, NULL);
         atomic_set(&ras->in_recovery, 0);
  }
  
@@ -1540,12 +1600,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
         unsigned int new_space = old_space + pages;
         unsigned int align_space = ALIGN(new_space, 512);
         void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
-       struct amdgpu_bo **bps_bo =
-                       kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
  
-       if (!bps || !bps_bo) {
+       if (!bps) {
                 kfree(bps);
-               kfree(bps_bo);
                 return -ENOMEM;
         }
  
@@ -1554,14 +1611,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
                                 data->count * sizeof(*data->bps));
                 kfree(data->bps);
         }
-       if (data->bps_bo) {
-               memcpy(bps_bo, data->bps_bo,
-                               data->count * sizeof(*data->bps_bo));
-               kfree(data->bps_bo);
-       }
  
         data->bps = bps;
-       data->bps_bo = bps_bo;
         data->space_left += align_space - old_space;
         return 0;
  }
@@ -1573,6 +1624,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
         struct ras_err_handler_data *data;
         int ret = 0;
+       uint32_t i;
  
         if (!con || !con->eh_data || !bps || pages <= 0)
                 return 0;
@@ -1582,16 +1634,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
         if (!data)
                 goto out;
  
-       if (data->space_left <= pages)
-               if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) {
+       for (i = 0; i < pages; i++) {
+               if (amdgpu_ras_check_bad_page_unlock(con,
+                       bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+                       continue;
+
+               if (!data->space_left &&
+                       amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
                         ret = -ENOMEM;
                         goto out;
                 }
  
-       memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
-       data->count += pages;
-       data->space_left -= pages;
+               amdgpu_vram_mgr_reserve_range(
+                       ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+                       bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+                       AMDGPU_GPU_PAGE_SIZE);
  
+               memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+               data->count++;
+               data->space_left--;
+       }
  out:
         mutex_unlock(&con->recovery_lock);
  
@@ -1602,7 +1664,7 @@ out:
   * write error record array to eeprom, the function should be
   * protected by recovery_lock
   */
-static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
         struct ras_err_handler_data *data;
@@ -1643,7 +1705,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
         int ret = 0;
  
         /* no bad page record, skip eeprom access */
-       if (!control->num_recs)
+       if (!control->num_recs || (amdgpu_bad_page_threshold == 0))
                 return ret;
  
         bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
@@ -1664,116 +1726,87 @@ out:
         return ret;
  }
  
-/*
- * check if an address belongs to bad page
- *
- * Note: this check is only for umc block
- */
-static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
                                 uint64_t addr)
  {
-       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
+       struct ras_err_handler_data *data = con->eh_data;
         int i;
-       bool ret = false;
-
-       if (!con || !con->eh_data)
-               return ret;
-
-       mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
  
         addr >>= AMDGPU_GPU_PAGE_SHIFT;
         for (i = 0; i < data->count; i++)
-               if (addr == data->bps[i].retired_page) {
-                       ret = true;
-                       goto out;
-               }
+               if (addr == data->bps[i].retired_page)
+                       return true;
  
-out:
-       mutex_unlock(&con->recovery_lock);
-       return ret;
+       return false;
  }
  
-/* called in gpu recovery/init */
-int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+                               uint64_t addr)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
-       uint64_t bp;
-       struct amdgpu_bo *bo = NULL;
-       int i, ret = 0;
+       bool ret = false;
  
         if (!con || !con->eh_data)
-               return 0;
+               return ret;
  
         mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
-       /* reserve vram at driver post stage. */
-       for (i = data->last_reserved; i < data->count; i++) {
-               bp = data->bps[i].retired_page;
-
-               /* There are two cases of reserve error should be ignored:
-                * 1) a ras bad page has been allocated (used by someone);
-                * 2) a ras bad page has been reserved (duplicate error injection
-                *    for one page);
-                */
-               if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
-                                              AMDGPU_GPU_PAGE_SIZE,
-                                              AMDGPU_GEM_DOMAIN_VRAM,
-                                              &bo, NULL))
-                       dev_warn(adev->dev, "RAS WARN: reserve vram for "
-                                       "retired page %llx fail\n", bp);
-
-               data->bps_bo[i] = bo;
-               data->last_reserved = i + 1;
-               bo = NULL;
-       }
-
-       /* continue to save bad pages to eeprom even reesrve_vram fails */
-       ret = amdgpu_ras_save_bad_pages(adev);
-out:
+       ret = amdgpu_ras_check_bad_page_unlock(con, addr);
         mutex_unlock(&con->recovery_lock);
         return ret;
  }
  
-/* called when driver unload */
-static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+                                       uint32_t max_length)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
-       struct amdgpu_bo *bo;
-       int i;
-
-       if (!con || !con->eh_data)
-               return 0;
+       int tmp_threshold = amdgpu_bad_page_threshold;
+       u64 val;
  
-       mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
-
-       for (i = data->last_reserved - 1; i >= 0; i--) {
-               bo = data->bps_bo[i];
+       /*
+        * Justification of value bad_page_cnt_threshold in ras structure
+        *
+        * Generally, -1 <= amdgpu_bad_page_threshold <= max record length
+        * in eeprom, and introduce two scenarios accordingly.
+        *
+        * Bad page retirement enablement:
+        *    - If amdgpu_bad_page_threshold = -1,
+        *      bad_page_cnt_threshold = typical value by formula.
+        *
+        *    - When the value from user is 0 < amdgpu_bad_page_threshold <
+        *      max record length in eeprom, use it directly.
+        *
+        * Bad page retirement disablement:
+        *    - If amdgpu_bad_page_threshold = 0, bad page retirement
+        *      functionality is disabled, and bad_page_cnt_threshold will
+        *      take no effect.
+        */
  
-               amdgpu_bo_free_kernel(&bo, NULL, NULL);
+       if (tmp_threshold < -1)
+               tmp_threshold = -1;
+       else if (tmp_threshold > max_length)
+               tmp_threshold = max_length;
  
-               data->bps_bo[i] = bo;
-               data->last_reserved = i;
+       if (tmp_threshold == -1) {
+               val = adev->gmc.mc_vram_size;
+               do_div(val, RAS_BAD_PAGE_RATE);
+               con->bad_page_cnt_threshold = min(lower_32_bits(val),
+                                               max_length);
+       } else {
+               con->bad_page_cnt_threshold = tmp_threshold;
         }
-out:
-       mutex_unlock(&con->recovery_lock);
-       return 0;
  }
  
  int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
  {
         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
         struct ras_err_handler_data **data;
+       uint32_t max_eeprom_records_len = 0;
+       bool exc_err_limit = false;
         int ret;
  
         if (con)
@@ -1792,31 +1825,41 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
         atomic_set(&con->in_recovery, 0);
         con->adev = adev;
  
-       ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
-       if (ret)
+       max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
+       amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
+
+       ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
+       /*
+        * This calling fails when exc_err_limit is true or
+        * ret != 0.
+        */
+       if (exc_err_limit || ret)
                 goto free;
  
         if (con->eeprom_control.num_recs) {
                 ret = amdgpu_ras_load_bad_pages(adev);
                 if (ret)
                         goto free;
-               ret = amdgpu_ras_reserve_bad_pages(adev);
-               if (ret)
-                       goto release;
         }
  
         return 0;
  
-release:
-       amdgpu_ras_release_bad_pages(adev);
  free:
         kfree((*data)->bps);
-       kfree((*data)->bps_bo);
         kfree(*data);
         con->eh_data = NULL;
  out:
         dev_warn(adev->dev, "Failed to initialize ras recovery!\n");
  
+       /*
+        * Except error threshold exceeding case, other failure cases in this
+        * function would not fail amdgpu driver init.
+        */
+       if (!exc_err_limit)
+               ret = 0;
+       else
+               ret = -EINVAL;
+
         return ret;
  }
  
@@ -1830,12 +1873,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
                 return 0;
  
         cancel_work_sync(&con->recovery_work);
-       amdgpu_ras_release_bad_pages(adev);
  
         mutex_lock(&con->recovery_lock);
         con->eh_data = NULL;
         kfree(data->bps);
-       kfree(data->bps_bo);
         kfree(data);
         mutex_unlock(&con->recovery_lock);
  
@@ -1856,6 +1897,17 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
         return 0;
  }
  
+static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
+{
+       if (adev->asic_type != CHIP_VEGA10 &&
+               adev->asic_type != CHIP_VEGA20 &&
+               adev->asic_type != CHIP_ARCTURUS &&
+               adev->asic_type != CHIP_SIENNA_CICHLID)
+               return 1;
+       else
+               return 0;
+}
+
  /*
   * check hardware's ras ability which will be saved in hw_supported.
   * if hardware does not support ras, we can skip some ras initializtion and
@@ -1872,8 +1924,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
         *supported = 0;
  
         if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
-           (adev->asic_type != CHIP_VEGA20 &&
-            adev->asic_type != CHIP_ARCTURUS))
+               amdgpu_ras_check_asic_type(adev))
                 return;
  
         if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
@@ -1895,6 +1946,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
  
         *supported = amdgpu_ras_enable == 0 ?
                         0 : *hw_supported & amdgpu_ras_mask;
+       adev->ras_features = *supported;
  }
  
  int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -1917,9 +1969,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
  
         amdgpu_ras_check_supported(adev, &con->hw_supported,
                         &con->supported);
-       if (!con->hw_supported) {
+       if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
                 r = 0;
-               goto err_out;
+               goto release_con;
         }
  
         con->features = 0;
@@ -1930,25 +1982,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
         if (adev->nbio.funcs->init_ras_controller_interrupt) {
                 r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
                 if (r)
-                       goto err_out;
+                       goto release_con;
         }
  
         if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
                 r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
                 if (r)
-                       goto err_out;
+                       goto release_con;
         }
  
         if (amdgpu_ras_fs_init(adev)) {
                 r = -EINVAL;
-               goto err_out;
+               goto release_con;
         }
  
         dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
                         "hardware ability[%x] ras_mask[%x]\n",
                         con->hw_supported, con->supported);
         return 0;
-err_out:
+release_con:
         amdgpu_ras_set_context(adev, NULL);
         kfree(con);
  
@@ -1976,7 +2028,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
                         amdgpu_ras_request_reset_on_boot(adev,
                                         ras_block->block);
                         return 0;
-               } else if (adev->in_suspend || adev->in_gpu_reset) {
+               } else if (adev->in_suspend || amdgpu_in_reset(adev)) {
                         /* in resume phase, if fail to enable ras,
                          * clean up all ras fs nodes, and disable ras */
                         goto cleanup;
@@ -1985,7 +2037,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
         }
  
         /* in resume phase, no need to create ras fs node */
-       if (adev->in_suspend || adev->in_gpu_reset)
+       if (adev->in_suspend || amdgpu_in_reset(adev))
                 return 0;
  
         if (ih_info->cb) {
@@ -2019,7 +2071,7 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,
  
         amdgpu_ras_sysfs_remove(adev, ras_block);
         if (ih_info->cb)
-                amdgpu_ras_interrupt_remove_handler(adev, ih_info);
+               amdgpu_ras_interrupt_remove_handler(adev, ih_info);
         amdgpu_ras_feature_enable(adev, ras_block, 0);
  }
  
@@ -2143,3 +2195,19 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
  
         return false;
  }
+
+bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       bool exc_err_limit = false;
+
+       if (con && (amdgpu_bad_page_threshold != 0))
+               amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control,
+                                               &exc_err_limit);
+
+       /*
+        * We are only interested in variable exc_err_limit,
+        * as it says if GPU is in bad state or not.
+        */
+       return exc_err_limit;
+}