struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_features || !con)
return NULL;
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
struct ras_manager *obj;
int i;
- if (!con)
+ if (!adev->ras_features || !con)
return NULL;
if (head) {
con->features |= BIT(head->block);
} else {
if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
- con->features &= ~BIT(head->block);
+ /* skip clean gfx ras context feature for VEGA20 Gaming.
+ * will clean later
+ */
+ if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
+ con->features &= ~BIT(head->block);
put_obj(obj);
}
}
if (ret)
return ret;
+ /* gfx block ras dsiable cmd must send to ras-ta */
+ if (head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features |= BIT(head->block);
+
ret = amdgpu_ras_feature_enable(adev, head, 0);
}
} else
struct ras_manager *obj;
struct ras_err_data data = {0, 0};
- if (!con)
+ if (!adev->ras_features || !con)
return 0;
list_for_each_entry(obj, &con->head, node) {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_features || !con)
return;
list_for_each_entry(obj, &con->head, node) {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!con)
+ if (!adev->ras_features || !con)
return;
list_for_each_entry(obj, &con->head, node) {
return ret;
}
-static uint32_t
-amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+ uint32_t max_length)
{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int tmp_threshold = amdgpu_bad_page_threshold;
u64 val;
- uint32_t max_length = 0;
- max_length = amdgpu_ras_eeprom_get_record_max_length();
/*
* Justification of value bad_page_cnt_threshold in ras structure
*
tmp_threshold = max_length;
if (tmp_threshold == -1) {
- val = adev->gmc.real_vram_size;
+ val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_RATE);
- tmp_threshold = min(lower_32_bits(val), max_length);
+ con->bad_page_cnt_threshold = min(lower_32_bits(val),
+ max_length);
+ } else {
+ con->bad_page_cnt_threshold = tmp_threshold;
}
-
- return tmp_threshold;
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
+ uint32_t max_eeprom_records_len = 0;
bool exc_err_limit = false;
int ret;
- if (con)
+ if (adev->ras_features && con)
data = &con->eh_data;
else
return 0;
atomic_set(&con->in_recovery, 0);
con->adev = adev;
- if (!con->bad_page_cnt_threshold) {
- con->bad_page_cnt_threshold =
- amdgpu_ras_calculate_badpags_threshold(adev);
-
- ret = amdgpu_vram_mgr_reserve_backup_pages(
- ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
- con->bad_page_cnt_threshold);
- if (ret)
- goto out;
- }
+ max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
+ amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
/* Todo: During test the SMU might fail to read the eeprom through I2C
* when the GPU is pending on XGMI reset during probe time
amdgpu_ras_check_supported(adev, &con->hw_supported,
&con->supported);
if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
+ /* set gfx block ras context feature for VEGA20 Gaming
+ * send ras disable cmd to ras ta during ras late init.
+ */
+ if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
+ con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
+
+ return 0;
+ }
+
r = 0;
goto release_con;
}
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
- if (!con)
+ if (!adev->ras_features || !con) {
+ /* clean ras context for VEGA20 Gaming after send ras disable cmd */
+ amdgpu_release_ras_context(adev);
+
return;
+ }
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
/* Set up all other IPs which are not implemented. There is a
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_features || !con)
return;
amdgpu_ras_disable_all_features(adev, 0);
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_features || !con)
return 0;
/* Need disable ras on all IPs here before ip [hw/sw]fini */
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!con)
+ if (!adev->ras_features || !con)
return 0;
amdgpu_ras_fs_fini(adev);
return false;
}
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return;
+
+ if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+ con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+ }
+}