struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, features_attr);
- return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+ return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
bool poison_stat = false;
struct amdgpu_device *adev = obj->adev;
- struct ras_err_data err_data = {0, 0, 0, NULL};
struct amdgpu_ras_block_object *block_obj =
amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
}
if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, &err_data, false);
+ amdgpu_umc_poison_handler(adev, false);
if (block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ /* Perform full reset in fatal error mode */
+ if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ else
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
- if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+ adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
else
struct amdgpu_device *adev = NULL;
uint32_t gpu_id = 0;
uint32_t umc_inst = 0, ch_inst = 0;
- struct ras_err_data err_data = {0, 0, 0, NULL};
/*
* If the error was generated in UMC_V2, which belongs to GPU UMCs,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst);
- err_data.err_addr =
- kcalloc(adev->umc.max_ras_err_cnt_per_query,
- sizeof(struct eeprom_table_record), GFP_KERNEL);
- if (!err_data.err_addr) {
- dev_warn(adev->dev,
- "Failed to alloc memory for umc error record in mca notifier!\n");
+ if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+ return NOTIFY_OK;
+ else
return NOTIFY_DONE;
- }
-
- /*
- * Translate UMC channel address to Physical address
- */
- if (adev->umc.ras &&
- adev->umc.ras->convert_ras_error_address)
- adev->umc.ras->convert_ras_error_address(adev,
- &err_data, m->addr, ch_inst, umc_inst);
-
- if (amdgpu_bad_page_threshold != 0) {
- amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
- amdgpu_ras_save_bad_pages(adev);
- }
-
- kfree(err_data.err_addr);
- return NOTIFY_OK;
}
static struct notifier_block amdgpu_bad_page_nb = {