]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Merge tag 'drm-misc-next-2021-04-01' of git://anongit.freedesktop.org/drm/drm-misc...
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.c
index 50f1a76389bc67e1d780e58488a629e03f3020b9..0e16683876aa482128ebd6f9333872debb06789a 100644 (file)
@@ -463,7 +463,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return NULL;
 
        if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
@@ -490,7 +490,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
        struct ras_manager *obj;
        int i;
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return NULL;
 
        if (head) {
@@ -590,7 +590,11 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
                con->features |= BIT(head->block);
        } else {
                if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
-                       con->features &= ~BIT(head->block);
+                       /* skip clean gfx ras context feature for VEGA20 Gaming.
+                        * will clean later
+                        */
+                       if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
+                               con->features &= ~BIT(head->block);
                        put_obj(obj);
                }
        }
@@ -693,6 +697,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
                        if (ret)
                                return ret;
 
+                       /* gfx block ras dsiable cmd must send to ras-ta */
+                       if (head->block == AMDGPU_RAS_BLOCK__GFX)
+                               con->features |= BIT(head->block);
+
                        ret = amdgpu_ras_feature_enable(adev, head, 0);
                }
        } else
@@ -948,7 +956,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
        struct ras_manager *obj;
        struct ras_err_data data = {0, 0};
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return 0;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1469,7 +1477,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1517,7 +1525,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return;
 
        list_for_each_entry(obj, &con->head, node) {
@@ -1782,14 +1790,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
        return ret;
 }
 
-static uint32_t
-amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+                                       uint32_t max_length)
 {
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        int tmp_threshold = amdgpu_bad_page_threshold;
        u64 val;
-       uint32_t max_length = 0;
 
-       max_length = amdgpu_ras_eeprom_get_record_max_length();
        /*
         * Justification of value bad_page_cnt_threshold in ras structure
         *
@@ -1815,22 +1822,24 @@ amdgpu_ras_calculate_badpags_threshold(struct amdgpu_device *adev)
                tmp_threshold = max_length;
 
        if (tmp_threshold == -1) {
-               val = adev->gmc.real_vram_size;
+               val = adev->gmc.mc_vram_size;
                do_div(val, RAS_BAD_PAGE_RATE);
-               tmp_threshold = min(lower_32_bits(val), max_length);
+               con->bad_page_cnt_threshold = min(lower_32_bits(val),
+                                               max_length);
+       } else {
+               con->bad_page_cnt_threshold = tmp_threshold;
        }
-
-       return tmp_threshold;
 }
 
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data **data;
+       uint32_t max_eeprom_records_len = 0;
        bool exc_err_limit = false;
        int ret;
 
-       if (con)
+       if (adev->ras_features && con)
                data = &con->eh_data;
        else
                return 0;
@@ -1846,16 +1855,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
        atomic_set(&con->in_recovery, 0);
        con->adev = adev;
 
-       if (!con->bad_page_cnt_threshold) {
-               con->bad_page_cnt_threshold =
-                       amdgpu_ras_calculate_badpags_threshold(adev);
-
-               ret = amdgpu_vram_mgr_reserve_backup_pages(
-                       ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
-                       con->bad_page_cnt_threshold);
-               if (ret)
-                       goto out;
-       }
+       max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
+       amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
 
        /* Todo: During test the SMU might fail to read the eeprom through I2C
         * when the GPU is pending on XGMI reset during probe time
@@ -2005,6 +2006,15 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        amdgpu_ras_check_supported(adev, &con->hw_supported,
                        &con->supported);
        if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
+               /* set gfx block ras context feature for VEGA20 Gaming
+                * send ras disable cmd to ras ta during ras late init.
+                */
+               if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
+                       con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
+
+                       return 0;
+               }
+
                r = 0;
                goto release_con;
        }
@@ -2118,8 +2128,12 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj, *tmp;
 
-       if (!con)
+       if (!adev->ras_features || !con) {
+               /* clean ras context for VEGA20 Gaming after send ras disable cmd */
+               amdgpu_release_ras_context(adev);
+
                return;
+       }
 
        if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
                /* Set up all other IPs which are not implemented. There is a
@@ -2160,7 +2174,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return;
 
        amdgpu_ras_disable_all_features(adev, 0);
@@ -2174,7 +2188,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return 0;
 
        /* Need disable ras on all IPs here before ip [hw/sw]fini */
@@ -2187,7 +2201,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
-       if (!con)
+       if (!adev->ras_features || !con)
                return 0;
 
        amdgpu_ras_fs_fini(adev);
@@ -2230,3 +2244,17 @@ bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
 
        return false;
 }
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+       if (!con)
+               return;
+
+       if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+               con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
+               amdgpu_ras_set_context(adev, NULL);
+               kfree(con);
+       }
+}
This page took 0.041109 seconds and 4 git commands to generate.