]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Merge tag 'pci-v6.2-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.c
index a4b47e1bd111d518548294d4ee2aec52ebaf8553..ad490c1e2f579bf86d3212323306f12b1d38ebbc 100644 (file)
@@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
        struct amdgpu_ras *con =
                container_of(attr, struct amdgpu_ras, features_attr);
 
-       return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+       return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
 }
 
 static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
@@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
 {
        bool poison_stat = false;
        struct amdgpu_device *adev = obj->adev;
-       struct ras_err_data err_data = {0, 0, 0, NULL};
        struct amdgpu_ras_block_object *block_obj =
                amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
 
@@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
        }
 
        if (!adev->gmc.xgmi.connected_to_cpu)
-               amdgpu_umc_poison_handler(adev, &err_data, false);
+               amdgpu_umc_poison_handler(adev, false);
 
        if (block_obj->hw_ops->handle_poison_consumption)
                poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
@@ -1949,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 
                reset_context.method = AMD_RESET_METHOD_NONE;
                reset_context.reset_req_dev = adev;
-               clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+               /* Perform full reset in fatal error mode */
+               if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+                       set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+               else
+                       clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
 
                amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
        }
@@ -2344,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
                                adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
                                                            1 << AMDGPU_RAS_BLOCK__DF);
 
-                               if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
+                               if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+                                   adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
                                        adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
                                                        1 << AMDGPU_RAS_BLOCK__JPEG);
                                else
@@ -2848,7 +2853,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
        struct amdgpu_device *adev = NULL;
        uint32_t gpu_id = 0;
        uint32_t umc_inst = 0, ch_inst = 0;
-       struct ras_err_data err_data = {0, 0, 0, NULL};
 
        /*
         * If the error was generated in UMC_V2, which belongs to GPU UMCs,
@@ -2887,31 +2891,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
        dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
                             umc_inst, ch_inst);
 
-       err_data.err_addr =
-               kcalloc(adev->umc.max_ras_err_cnt_per_query,
-                       sizeof(struct eeprom_table_record), GFP_KERNEL);
-       if (!err_data.err_addr) {
-               dev_warn(adev->dev,
-                       "Failed to alloc memory for umc error record in mca notifier!\n");
+       if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+               return NOTIFY_OK;
+       else
                return NOTIFY_DONE;
-       }
-
-       /*
-        * Translate UMC channel address to Physical address
-        */
-       if (adev->umc.ras &&
-           adev->umc.ras->convert_ras_error_address)
-               adev->umc.ras->convert_ras_error_address(adev,
-                       &err_data, m->addr, ch_inst, umc_inst);
-
-       if (amdgpu_bad_page_threshold != 0) {
-               amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
-                                               err_data.err_addr_cnt);
-               amdgpu_ras_save_bad_pages(adev);
-       }
-
-       kfree(err_data.err_addr);
-       return NOTIFY_OK;
 }
 
 static struct notifier_block amdgpu_bad_page_nb = {
This page took 0.047089 seconds and 4 git commands to generate.