]> Git Repo - J-linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Merge tag 'perf-tools-for-v6.4-3-2023-05-06' of git://git.kernel.org/pub/scm/linux...
[J-linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ras.c
index 63dfcc98152d57d3032a03513f7552275042891a..3ab8a88789c8fec5f82c5772e1e68b85ad6e0eda 100644 (file)
@@ -34,6 +34,7 @@
 #include "amdgpu_atomfirmware.h"
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "nbio_v4_3.h"
 #include "atom.h"
 #include "amdgpu_reset.h"
 
@@ -2428,6 +2429,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
                        else
                                adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
                                                        1 << AMDGPU_RAS_BLOCK__JPEG);
+
+                       /*
+                        * XGMI RAS is not supported if xgmi num physical nodes
+                        * is zero
+                        */
+                       if (!adev->gmc.xgmi.num_physical_nodes)
+                               adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
                } else {
                        dev_info(adev->dev, "SRAM ECC is not presented.\n");
                }
@@ -2554,21 +2562,34 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
        /* initialize nbio ras function ahead of any other
         * ras functions so hardware fatal error interrupt
         * can be enabled as early as possible */
-       switch (adev->asic_type) {
-       case CHIP_VEGA20:
-       case CHIP_ARCTURUS:
-       case CHIP_ALDEBARAN:
-               if (!adev->gmc.xgmi.connected_to_cpu) {
+       switch (adev->ip_versions[NBIO_HWIP][0]) {
+       case IP_VERSION(7, 4, 0):
+       case IP_VERSION(7, 4, 1):
+       case IP_VERSION(7, 4, 4):
+               if (!adev->gmc.xgmi.connected_to_cpu)
                        adev->nbio.ras = &nbio_v7_4_ras;
-                       amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block);
-                       adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm;
-               }
+               break;
+       case IP_VERSION(4, 3, 0):
+               if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+                       /* unlike other generation of nbio ras,
+                        * nbio v4_3 only support fatal error interrupt
+                        * to inform software that DF is freezed due to
+                        * system fatal error event. driver should not
+                        * enable nbio ras in such case. Instead,
+                        * check DF RAS */
+                       adev->nbio.ras = &nbio_v4_3_ras;
                break;
        default:
                /* nbio ras is not available */
                break;
        }
 
+       /* nbio ras block needs to be enabled ahead of other ras blocks
+        * to handle fatal error */
+       r = amdgpu_nbio_ras_sw_init(adev);
+       if (r)
+               return r;
+
        if (adev->nbio.ras &&
            adev->nbio.ras->init_ras_controller_interrupt) {
                r = adev->nbio.ras->init_ras_controller_interrupt(adev);
@@ -3073,9 +3094,6 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
        if (!adev || !ras_block_obj)
                return -EINVAL;
 
-       if (!amdgpu_ras_asic_supported(adev))
-               return 0;
-
        ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
        if (!ras_node)
                return -ENOMEM;
This page took 0.037731 seconds and 4 git commands to generate.