memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_data.err_addr = &err_rec;
- amdgpu_umc_fill_error_record(&err_data, address,
- (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
+ amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
case AMDGPU_RAS_BLOCK__SDMA:
mask = GENMASK(adev->sdma.num_instances - 1, 0);
break;
+ case AMDGPU_RAS_BLOCK__VCN:
+ case AMDGPU_RAS_BLOCK__JPEG:
+ mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+ break;
default:
- mask = 0;
+ mask = inst_mask;
break;
}
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
- if ((data.inject.address >= adev->gmc.mc_vram_size) ||
+ if ((data.inject.address >= adev->gmc.mc_vram_size &&
+ adev->gmc.mc_vram_size) ||
(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
dev_warn(adev->dev, "RAS WARN: input address "
"0x%llx is invalid.",
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *dir;
&amdgpu_ras_debugfs_eeprom_ops);
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
&con->bad_page_cnt_threshold);
+ debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
}
}
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, false);
+ amdgpu_umc_poison_handler(adev, false);
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
atomic_set(&con->in_recovery, 0);
con->eeprom_control.bad_channel_bitmap = 0;
- max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
+ max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
/* Todo: During test the SMU might fail to read the eeprom through I2C
{
adev->ras_hw_enabled = adev->ras_enabled = 0;
- if (!adev->is_atom_fw ||
- !amdgpu_ras_asic_supported(adev))
+ if (!amdgpu_ras_asic_supported(adev))
return;
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |