drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

   1 /*
   2  * Copyright 2018 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/debugfs.h>
  25 #include <linux/list.h>
  26 #include <linux/module.h>
  27 #include <linux/uaccess.h>
  28 #include <linux/reboot.h>
  29 #include <linux/syscalls.h>
  30 #include <linux/pm_runtime.h>
  31 #include <linux/list_sort.h>
  32
  33 #include "amdgpu.h"
  34 #include "amdgpu_ras.h"
  35 #include "amdgpu_atomfirmware.h"
  36 #include "amdgpu_xgmi.h"
  37 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
  38 #include "nbio_v4_3.h"
  39 #include "nbio_v7_9.h"
  40 #include "atom.h"
  41 #include "amdgpu_reset.h"
  42 #include "amdgpu_psp.h"
  43
  44 #ifdef CONFIG_X86_MCE_AMD
  45 #include <asm/mce.h>
  46
  47 static bool notifier_registered;
  48 #endif
  49 static const char *RAS_FS_NAME = "ras";
  50
  51 const char *ras_error_string[] = {
  52         "none",
  53         "parity",
  54         "single_correctable",
  55         "multi_uncorrectable",
  56         "poison",
  57 };
  58
  59 const char *ras_block_string[] = {
  60         "umc",
  61         "sdma",
  62         "gfx",
  63         "mmhub",
  64         "athub",
  65         "pcie_bif",
  66         "hdp",
  67         "xgmi_wafl",
  68         "df",
  69         "smn",
  70         "sem",
  71         "mp0",
  72         "mp1",
  73         "fuse",
  74         "mca",
  75         "vcn",
  76         "jpeg",
  77         "ih",
  78         "mpio",
  79 };
  80
  81 const char *ras_mca_block_string[] = {
  82         "mca_mp0",
  83         "mca_mp1",
  84         "mca_mpio",
  85         "mca_iohc",
  86 };
  87
  88 struct amdgpu_ras_block_list {
  89         /* ras block link */
  90         struct list_head node;
  91
  92         struct amdgpu_ras_block_object *ras_obj;
  93 };
  94
  95 const char *get_ras_block_str(struct ras_common_if *ras_block)
  96 {
  97         if (!ras_block)
  98                 return "NULL";
  99
 100         if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
 101             ras_block->block >= ARRAY_SIZE(ras_block_string))
 102                 return "OUT OF RANGE";
 103
 104         if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
 105                 return ras_mca_block_string[ras_block->sub_block_index];
 106
 107         return ras_block_string[ras_block->block];
 108 }
 109
 110 #define ras_block_str(_BLOCK_) \
 111         (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
 112
 113 #define ras_err_str(i) (ras_error_string[ffs(i)])
 114
 115 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 116
 117 /* inject address is 52 bits */
 118 #define RAS_UMC_INJECT_ADDR_LIMIT       (0x1ULL << 52)
 119
 120 /* typical ECC bad page rate is 1 bad page per 100MB VRAM */
 121 #define RAS_BAD_PAGE_COVER              (100 * 1024 * 1024ULL)
 122
 123 #define MAX_UMC_POISON_POLLING_TIME_ASYNC  300  //ms
 124
 125 #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100  //ms
 126
 127 enum amdgpu_ras_retire_page_reservation {
 128         AMDGPU_RAS_RETIRE_PAGE_RESERVED,
 129         AMDGPU_RAS_RETIRE_PAGE_PENDING,
 130         AMDGPU_RAS_RETIRE_PAGE_FAULT,
 131 };
 132
 133 atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
 134
 135 static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
 136                                 uint64_t addr);
 137 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 138                                 uint64_t addr);
 139 #ifdef CONFIG_X86_MCE_AMD
 140 static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
 141 struct mce_notifier_adev_list {
 142         struct amdgpu_device *devs[MAX_GPU_INSTANCE];
 143         int num_gpu;
 144 };
 145 static struct mce_notifier_adev_list mce_adev_list;
 146 #endif
 147
 148 void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
 149 {
 150         if (adev && amdgpu_ras_get_context(adev))
 151                 amdgpu_ras_get_context(adev)->error_query_ready = ready;
 152 }
 153
 154 static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
 155 {
 156         if (adev && amdgpu_ras_get_context(adev))
 157                 return amdgpu_ras_get_context(adev)->error_query_ready;
 158
 159         return false;
 160 }
 161
 162 static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
 163 {
 164         struct ras_err_data err_data;
 165         struct eeprom_table_record err_rec;
 166         int ret;
 167
 168         if ((address >= adev->gmc.mc_vram_size) ||
 169             (address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
 170                 dev_warn(adev->dev,
 171                          "RAS WARN: input address 0x%llx is invalid.\n",
 172                          address);
 173                 return -EINVAL;
 174         }
 175
 176         if (amdgpu_ras_check_bad_page(adev, address)) {
 177                 dev_warn(adev->dev,
 178                          "RAS WARN: 0x%llx has already been marked as bad page!\n",
 179                          address);
 180                 return 0;
 181         }
 182
 183         ret = amdgpu_ras_error_data_init(&err_data);
 184         if (ret)
 185                 return ret;
 186
 187         memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
 188         err_data.err_addr = &err_rec;
 189         amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
 190
 191         if (amdgpu_bad_page_threshold != 0) {
 192                 amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
 193                                          err_data.err_addr_cnt);
 194                 amdgpu_ras_save_bad_pages(adev, NULL);
 195         }
 196
 197         amdgpu_ras_error_data_fini(&err_data);
 198
 199         dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
 200         dev_warn(adev->dev, "Clear EEPROM:\n");
 201         dev_warn(adev->dev, "    echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
 202
 203         return 0;
 204 }
 205
 206 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
 207                                         size_t size, loff_t *pos)
 208 {
 209         struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
 210         struct ras_query_if info = {
 211                 .head = obj->head,
 212         };
 213         ssize_t s;
 214         char val[128];
 215
 216         if (amdgpu_ras_query_error_status(obj->adev, &info))
 217                 return -EINVAL;
 218
 219         /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
 220         if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
 221             amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
 222                 if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
 223                         dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
 224         }
 225
 226         s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
 227                         "ue", info.ue_count,
 228                         "ce", info.ce_count);
 229         if (*pos >= s)
 230                 return 0;
 231
 232         s -= *pos;
 233         s = min_t(u64, s, size);
 234
 235
 236         if (copy_to_user(buf, &val[*pos], s))
 237                 return -EINVAL;
 238
 239         *pos += s;
 240
 241         return s;
 242 }
 243
 244 static const struct file_operations amdgpu_ras_debugfs_ops = {
 245         .owner = THIS_MODULE,
 246         .read = amdgpu_ras_debugfs_read,
 247         .write = NULL,
 248         .llseek = default_llseek
 249 };
 250
 251 static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
 252 {
 253         int i;
 254
 255         for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
 256                 *block_id = i;
 257                 if (strcmp(name, ras_block_string[i]) == 0)
 258                         return 0;
 259         }
 260         return -EINVAL;
 261 }
 262
 263 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 264                 const char __user *buf, size_t size,
 265                 loff_t *pos, struct ras_debug_if *data)
 266 {
 267         ssize_t s = min_t(u64, 64, size);
 268         char str[65];
 269         char block_name[33];
 270         char err[9] = "ue";
 271         int op = -1;
 272         int block_id;
 273         uint32_t sub_block;
 274         u64 address, value;
 275         /* default value is 0 if the mask is not set by user */
 276         u32 instance_mask = 0;
 277
 278         if (*pos)
 279                 return -EINVAL;
 280         *pos = size;
 281
 282         memset(str, 0, sizeof(str));
 283         memset(data, 0, sizeof(*data));
 284
 285         if (copy_from_user(str, buf, s))
 286                 return -EINVAL;
 287
 288         if (sscanf(str, "disable %32s", block_name) == 1)
 289                 op = 0;
 290         else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
 291                 op = 1;
 292         else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
 293                 op = 2;
 294         else if (strstr(str, "retire_page") != NULL)
 295                 op = 3;
 296         else if (str[0] && str[1] && str[2] && str[3])
 297                 /* ascii string, but commands are not matched. */
 298                 return -EINVAL;
 299
 300         if (op != -1) {
 301                 if (op == 3) {
 302                         if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
 303                             sscanf(str, "%*s %llu", &address) != 1)
 304                                 return -EINVAL;
 305
 306                         data->op = op;
 307                         data->inject.address = address;
 308
 309                         return 0;
 310                 }
 311
 312                 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
 313                         return -EINVAL;
 314
 315                 data->head.block = block_id;
 316                 /* only ue, ce and poison errors are supported */
 317                 if (!memcmp("ue", err, 2))
 318                         data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 319                 else if (!memcmp("ce", err, 2))
 320                         data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
 321                 else if (!memcmp("poison", err, 6))
 322                         data->head.type = AMDGPU_RAS_ERROR__POISON;
 323                 else
 324                         return -EINVAL;
 325
 326                 data->op = op;
 327
 328                 if (op == 2) {
 329                         if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
 330                                    &sub_block, &address, &value, &instance_mask) != 4 &&
 331                             sscanf(str, "%*s %*s %*s %u %llu %llu %u",
 332                                    &sub_block, &address, &value, &instance_mask) != 4 &&
 333                                 sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
 334                                    &sub_block, &address, &value) != 3 &&
 335                             sscanf(str, "%*s %*s %*s %u %llu %llu",
 336                                    &sub_block, &address, &value) != 3)
 337                                 return -EINVAL;
 338                         data->head.sub_block_index = sub_block;
 339                         data->inject.address = address;
 340                         data->inject.value = value;
 341                         data->inject.instance_mask = instance_mask;
 342                 }
 343         } else {
 344                 if (size < sizeof(*data))
 345                         return -EINVAL;
 346
 347                 if (copy_from_user(data, buf, sizeof(*data)))
 348                         return -EINVAL;
 349         }
 350
 351         return 0;
 352 }
 353
 354 static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
 355                                 struct ras_debug_if *data)
 356 {
 357         int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
 358         uint32_t mask, inst_mask = data->inject.instance_mask;
 359
 360         /* no need to set instance mask if there is only one instance */
 361         if (num_xcc <= 1 && inst_mask) {
 362                 data->inject.instance_mask = 0;
 363                 dev_dbg(adev->dev,
 364                         "RAS inject mask(0x%x) isn't supported and force it to 0.\n",
 365                         inst_mask);
 366
 367                 return;
 368         }
 369
 370         switch (data->head.block) {
 371         case AMDGPU_RAS_BLOCK__GFX:
 372                 mask = GENMASK(num_xcc - 1, 0);
 373                 break;
 374         case AMDGPU_RAS_BLOCK__SDMA:
 375                 mask = GENMASK(adev->sdma.num_instances - 1, 0);
 376                 break;
 377         case AMDGPU_RAS_BLOCK__VCN:
 378         case AMDGPU_RAS_BLOCK__JPEG:
 379                 mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
 380                 break;
 381         default:
 382                 mask = inst_mask;
 383                 break;
 384         }
 385
 386         /* remove invalid bits in instance mask */
 387         data->inject.instance_mask &= mask;
 388         if (inst_mask != data->inject.instance_mask)
 389                 dev_dbg(adev->dev,
 390                         "Adjust RAS inject mask 0x%x to 0x%x\n",
 391                         inst_mask, data->inject.instance_mask);
 392 }
 393
 394 /**
 395  * DOC: AMDGPU RAS debugfs control interface
 396  *
 397  * The control interface accepts struct ras_debug_if which has two members.
 398  *
 399  * First member: ras_debug_if::head or ras_debug_if::inject.
 400  *
 401  * head is used to indicate which IP block will be under control.
 402  *
 403  * head has four members, they are block, type, sub_block_index, name.
 404  * block: which IP will be under control.
 405  * type: what kind of error will be enabled/disabled/injected.
 406  * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
 407  * name: the name of IP.
 408  *
 409  * inject has three more members than head, they are address, value and mask.
 410  * As their names indicate, inject operation will write the
 411  * value to the address.
 412  *
 413  * The second member: struct ras_debug_if::op.
 414  * It has three kinds of operations.
 415  *
 416  * - 0: disable RAS on the block. Take ::head as its data.
 417  * - 1: enable RAS on the block. Take ::head as its data.
 418  * - 2: inject errors on the block. Take ::inject as its data.
 419  *
 420  * How to use the interface?
 421  *
 422  * In a program
 423  *
 424  * Copy the struct ras_debug_if in your code and initialize it.
 425  * Write the struct to the control interface.
 426  *
 427  * From shell
 428  *
 429  * .. code-block:: bash
 430  *
 431  *      echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 432  *      echo "enable  <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 433  *      echo "inject  <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
 434  *
 435  * Where N, is the card which you want to affect.
 436  *
 437  * "disable" requires only the block.
 438  * "enable" requires the block and error type.
 439  * "inject" requires the block, error type, address, and value.
 440  *
 441  * The block is one of: umc, sdma, gfx, etc.
 442  *      see ras_block_string[] for details
 443  *
 444  * The error type is one of: ue, ce and poison where,
 445  *      ue is multi-uncorrectable
 446  *      ce is single-correctable
 447  *      poison is poison
 448  *
 449  * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
 450  * The address and value are hexadecimal numbers, leading 0x is optional.
 451  * The mask means instance mask, is optional, default value is 0x1.
 452  *
 453  * For instance,
 454  *
 455  * .. code-block:: bash
 456  *
 457  *      echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
 458  *      echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
 459  *      echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
 460  *
 461  * How to check the result of the operation?
 462  *
 463  * To check disable/enable, see "ras" features at,
 464  * /sys/class/drm/card[0/1/2...]/device/ras/features
 465  *
 466  * To check inject, see the corresponding error count at,
 467  * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
 468  *
 469  * .. note::
 470  *      Operations are only allowed on blocks which are supported.
 471  *      Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
 472  *      to see which blocks support RAS on a particular asic.
 473  *
 474  */
 475 static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
 476                                              const char __user *buf,
 477                                              size_t size, loff_t *pos)
 478 {
 479         struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
 480         struct ras_debug_if data;
 481         int ret = 0;
 482
 483         if (!amdgpu_ras_get_error_query_ready(adev)) {
 484                 dev_warn(adev->dev, "RAS WARN: error injection "
 485                                 "currently inaccessible\n");
 486                 return size;
 487         }
 488
 489         ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
 490         if (ret)
 491                 return ret;
 492
 493         if (data.op == 3) {
 494                 ret = amdgpu_reserve_page_direct(adev, data.inject.address);
 495                 if (!ret)
 496                         return size;
 497                 else
 498                         return ret;
 499         }
 500
 501         if (!amdgpu_ras_is_supported(adev, data.head.block))
 502                 return -EINVAL;
 503
 504         switch (data.op) {
 505         case 0:
 506                 ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
 507                 break;
 508         case 1:
 509                 ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
 510                 break;
 511         case 2:
 512                 if ((data.inject.address >= adev->gmc.mc_vram_size &&
 513                     adev->gmc.mc_vram_size) ||
 514                     (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) {
 515                         dev_warn(adev->dev, "RAS WARN: input address "
 516                                         "0x%llx is invalid.",
 517                                         data.inject.address);
 518                         ret = -EINVAL;
 519                         break;
 520                 }
 521
 522                 /* umc ce/ue error injection for a bad page is not allowed */
 523                 if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
 524                     amdgpu_ras_check_bad_page(adev, data.inject.address)) {
 525                         dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has "
 526                                  "already been marked as bad!\n",
 527                                  data.inject.address);
 528                         break;
 529                 }
 530
 531                 amdgpu_ras_instance_mask_check(adev, &data);
 532
 533                 /* data.inject.address is offset instead of absolute gpu address */
 534                 ret = amdgpu_ras_error_inject(adev, &data.inject);
 535                 break;
 536         default:
 537                 ret = -EINVAL;
 538                 break;
 539         }
 540
 541         if (ret)
 542                 return ret;
 543
 544         return size;
 545 }
 546
 547 /**
 548  * DOC: AMDGPU RAS debugfs EEPROM table reset interface
 549  *
 550  * Some boards contain an EEPROM which is used to persistently store a list of
 551  * bad pages which experiences ECC errors in vram.  This interface provides
 552  * a way to reset the EEPROM, e.g., after testing error injection.
 553  *
 554  * Usage:
 555  *
 556  * .. code-block:: bash
 557  *
 558  *      echo 1 > ../ras/ras_eeprom_reset
 559  *
 560  * will reset EEPROM table to 0 entries.
 561  *
 562  */
 563 static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
 564                                                const char __user *buf,
 565                                                size_t size, loff_t *pos)
 566 {
 567         struct amdgpu_device *adev =
 568                 (struct amdgpu_device *)file_inode(f)->i_private;
 569         int ret;
 570
 571         ret = amdgpu_ras_eeprom_reset_table(
 572                 &(amdgpu_ras_get_context(adev)->eeprom_control));
 573
 574         if (!ret) {
 575                 /* Something was written to EEPROM.
 576                  */
 577                 amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
 578                 return size;
 579         } else {
 580                 return ret;
 581         }
 582 }
 583
 584 static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
 585         .owner = THIS_MODULE,
 586         .read = NULL,
 587         .write = amdgpu_ras_debugfs_ctrl_write,
 588         .llseek = default_llseek
 589 };
 590
 591 static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
 592         .owner = THIS_MODULE,
 593         .read = NULL,
 594         .write = amdgpu_ras_debugfs_eeprom_write,
 595         .llseek = default_llseek
 596 };
 597
 598 /**
 599  * DOC: AMDGPU RAS sysfs Error Count Interface
 600  *
 601  * It allows the user to read the error count for each IP block on the gpu through
 602  * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
 603  *
 604  * It outputs the multiple lines which report the uncorrected (ue) and corrected
 605  * (ce) error counts.
 606  *
 607  * The format of one line is below,
 608  *
 609  * [ce|ue]: count
 610  *
 611  * Example:
 612  *
 613  * .. code-block:: bash
 614  *
 615  *      ue: 0
 616  *      ce: 1
 617  *
 618  */
 619 static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 620                 struct device_attribute *attr, char *buf)
 621 {
 622         struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
 623         struct ras_query_if info = {
 624                 .head = obj->head,
 625         };
 626
 627         if (!amdgpu_ras_get_error_query_ready(obj->adev))
 628                 return sysfs_emit(buf, "Query currently inaccessible\n");
 629
 630         if (amdgpu_ras_query_error_status(obj->adev, &info))
 631                 return -EINVAL;
 632
 633         if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
 634             amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
 635                 if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
 636                         dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
 637         }
 638
 639         if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
 640                 return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
 641                                 "ce", info.ce_count, "de", info.de_count);
 642         else
 643                 return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
 644                                 "ce", info.ce_count);
 645 }
 646
 647 /* obj begin */
 648
 649 #define get_obj(obj) do { (obj)->use++; } while (0)
 650 #define alive_obj(obj) ((obj)->use)
 651
 652 static inline void put_obj(struct ras_manager *obj)
 653 {
 654         if (obj && (--obj->use == 0)) {
 655                 list_del(&obj->node);
 656                 amdgpu_ras_error_data_fini(&obj->err_data);
 657         }
 658
 659         if (obj && (obj->use < 0))
 660                 DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
 661 }
 662
 663 /* make one obj and return it. */
 664 static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
 665                 struct ras_common_if *head)
 666 {
 667         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 668         struct ras_manager *obj;
 669
 670         if (!adev->ras_enabled || !con)
 671                 return NULL;
 672
 673         if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 674                 return NULL;
 675
 676         if (head->block == AMDGPU_RAS_BLOCK__MCA) {
 677                 if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
 678                         return NULL;
 679
 680                 obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
 681         } else
 682                 obj = &con->objs[head->block];
 683
 684         /* already exist. return obj? */
 685         if (alive_obj(obj))
 686                 return NULL;
 687
 688         if (amdgpu_ras_error_data_init(&obj->err_data))
 689                 return NULL;
 690
 691         obj->head = *head;
 692         obj->adev = adev;
 693         list_add(&obj->node, &con->head);
 694         get_obj(obj);
 695
 696         return obj;
 697 }
 698
 699 /* return an obj equal to head, or the first when head is NULL */
 700 struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
 701                 struct ras_common_if *head)
 702 {
 703         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 704         struct ras_manager *obj;
 705         int i;
 706
 707         if (!adev->ras_enabled || !con)
 708                 return NULL;
 709
 710         if (head) {
 711                 if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
 712                         return NULL;
 713
 714                 if (head->block == AMDGPU_RAS_BLOCK__MCA) {
 715                         if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
 716                                 return NULL;
 717
 718                         obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
 719                 } else
 720                         obj = &con->objs[head->block];
 721
 722                 if (alive_obj(obj))
 723                         return obj;
 724         } else {
 725                 for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
 726                         obj = &con->objs[i];
 727                         if (alive_obj(obj))
 728                                 return obj;
 729                 }
 730         }
 731
 732         return NULL;
 733 }
 734 /* obj end */
 735
 736 /* feature ctl begin */
 737 static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
 738                                          struct ras_common_if *head)
 739 {
 740         return adev->ras_hw_enabled & BIT(head->block);
 741 }
 742
 743 static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
 744                 struct ras_common_if *head)
 745 {
 746         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 747
 748         return con->features & BIT(head->block);
 749 }
 750
 751 /*
 752  * if obj is not created, then create one.
 753  * set feature enable flag.
 754  */
 755 static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 756                 struct ras_common_if *head, int enable)
 757 {
 758         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 759         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
 760
 761         /* If hardware does not support ras, then do not create obj.
 762          * But if hardware support ras, we can create the obj.
 763          * Ras framework checks con->hw_supported to see if it need do
 764          * corresponding initialization.
 765          * IP checks con->support to see if it need disable ras.
 766          */
 767         if (!amdgpu_ras_is_feature_allowed(adev, head))
 768                 return 0;
 769
 770         if (enable) {
 771                 if (!obj) {
 772                         obj = amdgpu_ras_create_obj(adev, head);
 773                         if (!obj)
 774                                 return -EINVAL;
 775                 } else {
 776                         /* In case we create obj somewhere else */
 777                         get_obj(obj);
 778                 }
 779                 con->features |= BIT(head->block);
 780         } else {
 781                 if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
 782                         con->features &= ~BIT(head->block);
 783                         put_obj(obj);
 784                 }
 785         }
 786
 787         return 0;
 788 }
 789
 790 /* wrapper of psp_ras_enable_features */
 791 int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 792                 struct ras_common_if *head, bool enable)
 793 {
 794         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 795         union ta_ras_cmd_input *info;
 796         int ret;
 797
 798         if (!con)
 799                 return -EINVAL;
 800
 801         /* For non-gfx ip, do not enable ras feature if it is not allowed */
 802         /* For gfx ip, regardless of feature support status, */
 803         /* Force issue enable or disable ras feature commands */
 804         if (head->block != AMDGPU_RAS_BLOCK__GFX &&
 805             !amdgpu_ras_is_feature_allowed(adev, head))
 806                 return 0;
 807
 808         /* Only enable gfx ras feature from host side */
 809         if (head->block == AMDGPU_RAS_BLOCK__GFX &&
 810             !amdgpu_sriov_vf(adev) &&
 811             !amdgpu_ras_intr_triggered()) {
 812                 info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
 813                 if (!info)
 814                         return -ENOMEM;
 815
 816                 if (!enable) {
 817                         info->disable_features = (struct ta_ras_disable_features_input) {
 818                                 .block_id =  amdgpu_ras_block_to_ta(head->block),
 819                                 .error_type = amdgpu_ras_error_to_ta(head->type),
 820                         };
 821                 } else {
 822                         info->enable_features = (struct ta_ras_enable_features_input) {
 823                                 .block_id =  amdgpu_ras_block_to_ta(head->block),
 824                                 .error_type = amdgpu_ras_error_to_ta(head->type),
 825                         };
 826                 }
 827
 828                 ret = psp_ras_enable_features(&adev->psp, info, enable);
 829                 if (ret) {
 830                         dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
 831                                 enable ? "enable":"disable",
 832                                 get_ras_block_str(head),
 833                                 amdgpu_ras_is_poison_mode_supported(adev), ret);
 834                         kfree(info);
 835                         return ret;
 836                 }
 837
 838                 kfree(info);
 839         }
 840
 841         /* setup the obj */
 842         __amdgpu_ras_feature_enable(adev, head, enable);
 843
 844         return 0;
 845 }
 846
 847 /* Only used in device probe stage and called only once. */
 848 int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 849                 struct ras_common_if *head, bool enable)
 850 {
 851         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 852         int ret;
 853
 854         if (!con)
 855                 return -EINVAL;
 856
 857         if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
 858                 if (enable) {
 859                         /* There is no harm to issue a ras TA cmd regardless of
 860                          * the currecnt ras state.
 861                          * If current state == target state, it will do nothing
 862                          * But sometimes it requests driver to reset and repost
 863                          * with error code -EAGAIN.
 864                          */
 865                         ret = amdgpu_ras_feature_enable(adev, head, 1);
 866                         /* With old ras TA, we might fail to enable ras.
 867                          * Log it and just setup the object.
 868                          * TODO need remove this WA in the future.
 869                          */
 870                         if (ret == -EINVAL) {
 871                                 ret = __amdgpu_ras_feature_enable(adev, head, 1);
 872                                 if (!ret)
 873                                         dev_info(adev->dev,
 874                                                 "RAS INFO: %s setup object\n",
 875                                                 get_ras_block_str(head));
 876                         }
 877                 } else {
 878                         /* setup the object then issue a ras TA disable cmd.*/
 879                         ret = __amdgpu_ras_feature_enable(adev, head, 1);
 880                         if (ret)
 881                                 return ret;
 882
 883                         /* gfx block ras dsiable cmd must send to ras-ta */
 884                         if (head->block == AMDGPU_RAS_BLOCK__GFX)
 885                                 con->features |= BIT(head->block);
 886
 887                         ret = amdgpu_ras_feature_enable(adev, head, 0);
 888
 889                         /* clean gfx block ras features flag */
 890                         if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
 891                                 con->features &= ~BIT(head->block);
 892                 }
 893         } else
 894                 ret = amdgpu_ras_feature_enable(adev, head, enable);
 895
 896         return ret;
 897 }
 898
 899 static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
 900                 bool bypass)
 901 {
 902         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 903         struct ras_manager *obj, *tmp;
 904
 905         list_for_each_entry_safe(obj, tmp, &con->head, node) {
 906                 /* bypass psp.
 907                  * aka just release the obj and corresponding flags
 908                  */
 909                 if (bypass) {
 910                         if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
 911                                 break;
 912                 } else {
 913                         if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
 914                                 break;
 915                 }
 916         }
 917
 918         return con->features;
 919 }
 920
 921 static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
 922                 bool bypass)
 923 {
 924         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 925         int i;
 926         const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
 927
 928         for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
 929                 struct ras_common_if head = {
 930                         .block = i,
 931                         .type = default_ras_type,
 932                         .sub_block_index = 0,
 933                 };
 934
 935                 if (i == AMDGPU_RAS_BLOCK__MCA)
 936                         continue;
 937
 938                 if (bypass) {
 939                         /*
 940                          * bypass psp. vbios enable ras for us.
 941                          * so just create the obj
 942                          */
 943                         if (__amdgpu_ras_feature_enable(adev, &head, 1))
 944                                 break;
 945                 } else {
 946                         if (amdgpu_ras_feature_enable(adev, &head, 1))
 947                                 break;
 948                 }
 949         }
 950
 951         for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
 952                 struct ras_common_if head = {
 953                         .block = AMDGPU_RAS_BLOCK__MCA,
 954                         .type = default_ras_type,
 955                         .sub_block_index = i,
 956                 };
 957
 958                 if (bypass) {
 959                         /*
 960                          * bypass psp. vbios enable ras for us.
 961                          * so just create the obj
 962                          */
 963                         if (__amdgpu_ras_feature_enable(adev, &head, 1))
 964                                 break;
 965                 } else {
 966                         if (amdgpu_ras_feature_enable(adev, &head, 1))
 967                                 break;
 968                 }
 969         }
 970
 971         return con->features;
 972 }
 973 /* feature ctl end */
 974
 975 static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
 976                 enum amdgpu_ras_block block)
 977 {
 978         if (!block_obj)
 979                 return -EINVAL;
 980
 981         if (block_obj->ras_comm.block == block)
 982                 return 0;
 983
 984         return -EINVAL;
 985 }
 986
 987 static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
 988                                         enum amdgpu_ras_block block, uint32_t sub_block_index)
 989 {
 990         struct amdgpu_ras_block_list *node, *tmp;
 991         struct amdgpu_ras_block_object *obj;
 992
 993         if (block >= AMDGPU_RAS_BLOCK__LAST)
 994                 return NULL;
 995
 996         list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
 997                 if (!node->ras_obj) {
 998                         dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
 999                         continue;
1000                 }
1001
1002                 obj = node->ras_obj;
1003                 if (obj->ras_block_match) {
1004                         if (obj->ras_block_match(obj, block, sub_block_index) == 0)
1005                                 return obj;
1006                 } else {
1007                         if (amdgpu_ras_block_match_default(obj, block) == 0)
1008                                 return obj;
1009                 }
1010         }
1011
1012         return NULL;
1013 }
1014
1015 static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
1016 {
1017         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1018         int ret = 0;
1019
1020         /*
1021          * choosing right query method according to
1022          * whether smu support query error information
1023          */
1024         ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
1025         if (ret == -EOPNOTSUPP) {
1026                 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1027                         adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
1028                         adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1029
1030                 /* umc query_ras_error_address is also responsible for clearing
1031                  * error status
1032                  */
1033                 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1034                     adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
1035                         adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
1036         } else if (!ret) {
1037                 if (adev->umc.ras &&
1038                         adev->umc.ras->ecc_info_query_ras_error_count)
1039                         adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
1040
1041                 if (adev->umc.ras &&
1042                         adev->umc.ras->ecc_info_query_ras_error_address)
1043                         adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
1044         }
1045 }
1046
1047 static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
1048                                               struct ras_manager *ras_mgr,
1049                                               struct ras_err_data *err_data,
1050                                               struct ras_query_context *qctx,
1051                                               const char *blk_name,
1052                                               bool is_ue,
1053                                               bool is_de)
1054 {
1055         struct amdgpu_smuio_mcm_config_info *mcm_info;
1056         struct ras_err_node *err_node;
1057         struct ras_err_info *err_info;
1058         u64 event_id = qctx->event_id;
1059
1060         if (is_ue) {
1061                 for_each_ras_error(err_node, err_data) {
1062                         err_info = &err_node->err_info;
1063                         mcm_info = &err_info->mcm_info;
1064                         if (err_info->ue_count) {
1065                                 RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1066                                               "%lld new uncorrectable hardware errors detected in %s block\n",
1067                                               mcm_info->socket_id,
1068                                               mcm_info->die_id,
1069                                               err_info->ue_count,
1070                                               blk_name);
1071                         }
1072                 }
1073
1074                 for_each_ras_error(err_node, &ras_mgr->err_data) {
1075                         err_info = &err_node->err_info;
1076                         mcm_info = &err_info->mcm_info;
1077                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1078                                       "%lld uncorrectable hardware errors detected in total in %s block\n",
1079                                       mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
1080                 }
1081
1082         } else {
1083                 if (is_de) {
1084                         for_each_ras_error(err_node, err_data) {
1085                                 err_info = &err_node->err_info;
1086                                 mcm_info = &err_info->mcm_info;
1087                                 if (err_info->de_count) {
1088                                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1089                                                       "%lld new deferred hardware errors detected in %s block\n",
1090                                                       mcm_info->socket_id,
1091                                                       mcm_info->die_id,
1092                                                       err_info->de_count,
1093                                                       blk_name);
1094                                 }
1095                         }
1096
1097                         for_each_ras_error(err_node, &ras_mgr->err_data) {
1098                                 err_info = &err_node->err_info;
1099                                 mcm_info = &err_info->mcm_info;
1100                                 RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1101                                               "%lld deferred hardware errors detected in total in %s block\n",
1102                                               mcm_info->socket_id, mcm_info->die_id,
1103                                               err_info->de_count, blk_name);
1104                         }
1105                 } else {
1106                         for_each_ras_error(err_node, err_data) {
1107                                 err_info = &err_node->err_info;
1108                                 mcm_info = &err_info->mcm_info;
1109                                 if (err_info->ce_count) {
1110                                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1111                                                       "%lld new correctable hardware errors detected in %s block\n",
1112                                                       mcm_info->socket_id,
1113                                                       mcm_info->die_id,
1114                                                       err_info->ce_count,
1115                                                       blk_name);
1116                                 }
1117                         }
1118
1119                         for_each_ras_error(err_node, &ras_mgr->err_data) {
1120                                 err_info = &err_node->err_info;
1121                                 mcm_info = &err_info->mcm_info;
1122                                 RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
1123                                               "%lld correctable hardware errors detected in total in %s block\n",
1124                                               mcm_info->socket_id, mcm_info->die_id,
1125                                               err_info->ce_count, blk_name);
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static inline bool err_data_has_source_info(struct ras_err_data *data)
1132 {
1133         return !list_empty(&data->err_node_list);
1134 }
1135
1136 static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
1137                                              struct ras_query_if *query_if,
1138                                              struct ras_err_data *err_data,
1139                                              struct ras_query_context *qctx)
1140 {
1141         struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
1142         const char *blk_name = get_ras_block_str(&query_if->head);
1143         u64 event_id = qctx->event_id;
1144
1145         if (err_data->ce_count) {
1146                 if (err_data_has_source_info(err_data)) {
1147                         amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
1148                                                           blk_name, false, false);
1149                 } else if (!adev->aid_mask &&
1150                            adev->smuio.funcs &&
1151                            adev->smuio.funcs->get_socket_id &&
1152                            adev->smuio.funcs->get_die_id) {
1153                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
1154                                       "%ld correctable hardware errors "
1155                                       "detected in %s block\n",
1156                                       adev->smuio.funcs->get_socket_id(adev),
1157                                       adev->smuio.funcs->get_die_id(adev),
1158                                       ras_mgr->err_data.ce_count,
1159                                       blk_name);
1160                 } else {
1161                         RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
1162                                       "detected in %s block\n",
1163                                       ras_mgr->err_data.ce_count,
1164                                       blk_name);
1165                 }
1166         }
1167
1168         if (err_data->ue_count) {
1169                 if (err_data_has_source_info(err_data)) {
1170                         amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
1171                                                           blk_name, true, false);
1172                 } else if (!adev->aid_mask &&
1173                            adev->smuio.funcs &&
1174                            adev->smuio.funcs->get_socket_id &&
1175                            adev->smuio.funcs->get_die_id) {
1176                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
1177                                       "%ld uncorrectable hardware errors "
1178                                       "detected in %s block\n",
1179                                       adev->smuio.funcs->get_socket_id(adev),
1180                                       adev->smuio.funcs->get_die_id(adev),
1181                                       ras_mgr->err_data.ue_count,
1182                                       blk_name);
1183                 } else {
1184                         RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
1185                                       "detected in %s block\n",
1186                                       ras_mgr->err_data.ue_count,
1187                                       blk_name);
1188                 }
1189         }
1190
1191         if (err_data->de_count) {
1192                 if (err_data_has_source_info(err_data)) {
1193                         amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
1194                                                           blk_name, false, true);
1195                 } else if (!adev->aid_mask &&
1196                            adev->smuio.funcs &&
1197                            adev->smuio.funcs->get_socket_id &&
1198                            adev->smuio.funcs->get_die_id) {
1199                         RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
1200                                       "%ld deferred hardware errors "
1201                                       "detected in %s block\n",
1202                                       adev->smuio.funcs->get_socket_id(adev),
1203                                       adev->smuio.funcs->get_die_id(adev),
1204                                       ras_mgr->err_data.de_count,
1205                                       blk_name);
1206                 } else {
1207                         RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
1208                                       "detected in %s block\n",
1209                                       ras_mgr->err_data.de_count,
1210                                       blk_name);
1211                 }
1212         }
1213 }
1214
1215 static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
1216 {
1217         struct ras_err_node *err_node;
1218         struct ras_err_info *err_info;
1219
1220         if (err_data_has_source_info(err_data)) {
1221                 for_each_ras_error(err_node, err_data) {
1222                         err_info = &err_node->err_info;
1223                         amdgpu_ras_error_statistic_de_count(&obj->err_data,
1224                                         &err_info->mcm_info, NULL, err_info->de_count);
1225                         amdgpu_ras_error_statistic_ce_count(&obj->err_data,
1226                                         &err_info->mcm_info, NULL, err_info->ce_count);
1227                         amdgpu_ras_error_statistic_ue_count(&obj->err_data,
1228                                         &err_info->mcm_info, NULL, err_info->ue_count);
1229                 }
1230         } else {
1231                 /* for legacy asic path which doesn't has error source info */
1232                 obj->err_data.ue_count += err_data->ue_count;
1233                 obj->err_data.ce_count += err_data->ce_count;
1234                 obj->err_data.de_count += err_data->de_count;
1235         }
1236 }
1237
1238 static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
1239 {
1240         struct ras_common_if head;
1241
1242         memset(&head, 0, sizeof(head));
1243         head.block = blk;
1244
1245         return amdgpu_ras_find_obj(adev, &head);
1246 }
1247
1248 int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
1249                         const struct aca_info *aca_info, void *data)
1250 {
1251         struct ras_manager *obj;
1252
1253         /* in resume phase, no need to create aca fs node */
1254         if (adev->in_suspend || amdgpu_in_reset(adev))
1255                 return 0;
1256
1257         obj = get_ras_manager(adev, blk);
1258         if (!obj)
1259                 return -EINVAL;
1260
1261         return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
1262 }
1263
1264 int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
1265 {
1266         struct ras_manager *obj;
1267
1268         obj = get_ras_manager(adev, blk);
1269         if (!obj)
1270                 return -EINVAL;
1271
1272         amdgpu_aca_remove_handle(&obj->aca_handle);
1273
1274         return 0;
1275 }
1276
1277 static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
1278                                          enum aca_error_type type, struct ras_err_data *err_data,
1279                                          struct ras_query_context *qctx)
1280 {
1281         struct ras_manager *obj;
1282
1283         obj = get_ras_manager(adev, blk);
1284         if (!obj)
1285                 return -EINVAL;
1286
1287         return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
1288 }
1289
1290 ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
1291                                   struct aca_handle *handle, char *buf, void *data)
1292 {
1293         struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
1294         struct ras_query_if info = {
1295                 .head = obj->head,
1296         };
1297
1298         if (amdgpu_ras_query_error_status(obj->adev, &info))
1299                 return -EINVAL;
1300
1301         return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
1302                           "ce", info.ce_count, "de", info.de_count);
1303 }
1304
1305 static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
1306                                                 struct ras_query_if *info,
1307                                                 struct ras_err_data *err_data,
1308                                                 struct ras_query_context *qctx,
1309                                                 unsigned int error_query_mode)
1310 {
1311         enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
1312         struct amdgpu_ras_block_object *block_obj = NULL;
1313         int ret;
1314
1315         if (blk == AMDGPU_RAS_BLOCK_COUNT)
1316                 return -EINVAL;
1317
1318         if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
1319                 return -EINVAL;
1320
1321         if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
1322                 if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
1323                         amdgpu_ras_get_ecc_info(adev, err_data);
1324                 } else {
1325                         block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
1326                         if (!block_obj || !block_obj->hw_ops) {
1327                                 dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1328                                              get_ras_block_str(&info->head));
1329                                 return -EINVAL;
1330                         }
1331
1332                         if (block_obj->hw_ops->query_ras_error_count)
1333                                 block_obj->hw_ops->query_ras_error_count(adev, err_data);
1334
1335                         if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
1336                             (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
1337                             (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
1338                                 if (block_obj->hw_ops->query_ras_error_status)
1339                                         block_obj->hw_ops->query_ras_error_status(adev);
1340                         }
1341                 }
1342         } else {
1343                 if (amdgpu_aca_is_enabled(adev)) {
1344                         ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
1345                         if (ret)
1346                                 return ret;
1347
1348                         ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
1349                         if (ret)
1350                                 return ret;
1351
1352                         ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
1353                         if (ret)
1354                                 return ret;
1355                 } else {
1356                         /* FIXME: add code to check return value later */
1357                         amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
1358                         amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
1359                 }
1360         }
1361
1362         return 0;
1363 }
1364
1365 /* query/inject/cure begin */
1366 int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
1367 {
1368         struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1369         struct ras_err_data err_data;
1370         struct ras_query_context qctx;
1371         unsigned int error_query_mode;
1372         int ret;
1373
1374         if (!obj)
1375                 return -EINVAL;
1376
1377         ret = amdgpu_ras_error_data_init(&err_data);
1378         if (ret)
1379                 return ret;
1380
1381         if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
1382                 return -EINVAL;
1383
1384         memset(&qctx, 0, sizeof(qctx));
1385         qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ?
1386                                                    RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID);
1387
1388         if (!down_read_trylock(&adev->reset_domain->sem)) {
1389                 ret = -EIO;
1390                 goto out_fini_err_data;
1391         }
1392
1393         ret = amdgpu_ras_query_error_status_helper(adev, info,
1394                                                    &err_data,
1395                                                    &qctx,
1396                                                    error_query_mode);
1397         up_read(&adev->reset_domain->sem);
1398         if (ret)
1399                 goto out_fini_err_data;
1400
1401         amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
1402
1403         info->ue_count = obj->err_data.ue_count;
1404         info->ce_count = obj->err_data.ce_count;
1405         info->de_count = obj->err_data.de_count;
1406
1407         amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
1408
1409 out_fini_err_data:
1410         amdgpu_ras_error_data_fini(&err_data);
1411
1412         return ret;
1413 }
1414
1415 int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
1416                 enum amdgpu_ras_block block)
1417 {
1418         struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
1419         const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
1420         const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
1421
1422         if (!block_obj || !block_obj->hw_ops) {
1423                 dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1424                                 ras_block_str(block));
1425                 return -EOPNOTSUPP;
1426         }
1427
1428         if (!amdgpu_ras_is_supported(adev, block) ||
1429             !amdgpu_ras_get_aca_debug_mode(adev))
1430                 return -EOPNOTSUPP;
1431
1432         /* skip ras error reset in gpu reset */
1433         if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
1434             ((smu_funcs && smu_funcs->set_debug_mode) ||
1435              (mca_funcs && mca_funcs->mca_set_debug_mode)))
1436                 return -EOPNOTSUPP;
1437
1438         if (block_obj->hw_ops->reset_ras_error_count)
1439                 block_obj->hw_ops->reset_ras_error_count(adev);
1440
1441         return 0;
1442 }
1443
1444 int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
1445                 enum amdgpu_ras_block block)
1446 {
1447         struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
1448
1449         if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
1450                 return 0;
1451
1452         if ((block == AMDGPU_RAS_BLOCK__GFX) ||
1453             (block == AMDGPU_RAS_BLOCK__MMHUB)) {
1454                 if (block_obj->hw_ops->reset_ras_error_status)
1455                         block_obj->hw_ops->reset_ras_error_status(adev);
1456         }
1457
1458         return 0;
1459 }
1460
1461 /* wrapper of psp_ras_trigger_error */
1462 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
1463                 struct ras_inject_if *info)
1464 {
1465         struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1466         struct ta_ras_trigger_error_input block_info = {
1467                 .block_id =  amdgpu_ras_block_to_ta(info->head.block),
1468                 .inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
1469                 .sub_block_index = info->head.sub_block_index,
1470                 .address = info->address,
1471                 .value = info->value,
1472         };
1473         int ret = -EINVAL;
1474         struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
1475                                                         info->head.block,
1476                                                         info->head.sub_block_index);
1477
1478         /* inject on guest isn't allowed, return success directly */
1479         if (amdgpu_sriov_vf(adev))
1480                 return 0;
1481
1482         if (!obj)
1483                 return -EINVAL;
1484
1485         if (!block_obj || !block_obj->hw_ops)   {
1486                 dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
1487                              get_ras_block_str(&info->head));
1488                 return -EINVAL;
1489         }
1490
1491         /* Calculate XGMI relative offset */
1492         if (adev->gmc.xgmi.num_physical_nodes > 1 &&
1493             info->head.block != AMDGPU_RAS_BLOCK__GFX) {
1494                 block_info.address =
1495                         amdgpu_xgmi_get_relative_phy_addr(adev,
1496                                                           block_info.address);
1497         }
1498
1499         if (block_obj->hw_ops->ras_error_inject) {
1500                 if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
1501                         ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
1502                 else /* Special ras_error_inject is defined (e.g: xgmi) */
1503                         ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
1504                                                 info->instance_mask);
1505         } else {
1506                 /* default path */
1507                 ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
1508         }
1509
1510         if (ret)
1511                 dev_err(adev->dev, "ras inject %s failed %d\n",
1512                         get_ras_block_str(&info->head), ret);
1513
1514         return ret;
1515 }
1516
1517 /**
1518  * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
1519  * @adev: pointer to AMD GPU device
1520  * @ce_count: pointer to an integer to be set to the count of correctible errors.
1521  * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
1522  * @query_info: pointer to ras_query_if
1523  *
1524  * Return 0 for query success or do nothing, otherwise return an error
1525  * on failures
1526  */
1527 static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
1528                                                unsigned long *ce_count,
1529                                                unsigned long *ue_count,
1530                                                struct ras_query_if *query_info)
1531 {
1532         int ret;
1533
1534         if (!query_info)
1535                 /* do nothing if query_info is not specified */
1536                 return 0;
1537
1538         ret = amdgpu_ras_query_error_status(adev, query_info);
1539         if (ret)
1540                 return ret;
1541
1542         *ce_count += query_info->ce_count;
1543         *ue_count += query_info->ue_count;
1544
1545         /* some hardware/IP supports read to clear
1546          * no need to explictly reset the err status after the query call */
1547         if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
1548             amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
1549                 if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
1550                         dev_warn(adev->dev,
1551                                  "Failed to reset error counter and error status\n");
1552         }
1553
1554         return 0;
1555 }
1556
1557 /**
1558  * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
1559  * @adev: pointer to AMD GPU device
1560  * @ce_count: pointer to an integer to be set to the count of correctible errors.
1561  * @ue_count: pointer to an integer to be set to the count of uncorrectible
1562  * errors.
1563  * @query_info: pointer to ras_query_if if the query request is only for
1564  * specific ip block; if info is NULL, then the qurey request is for
1565  * all the ip blocks that support query ras error counters/status
1566  *
1567  * If set, @ce_count or @ue_count, count and return the corresponding
1568  * error counts in those integer pointers. Return 0 if the device
1569  * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
1570  */
1571 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
1572                                  unsigned long *ce_count,
1573                                  unsigned long *ue_count,
1574                                  struct ras_query_if *query_info)
1575 {
1576         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1577         struct ras_manager *obj;
1578         unsigned long ce, ue;
1579         int ret;
1580
1581         if (!adev->ras_enabled || !con)
1582                 return -EOPNOTSUPP;
1583
1584         /* Don't count since no reporting.
1585          */
1586         if (!ce_count && !ue_count)
1587                 return 0;
1588
1589         ce = 0;
1590         ue = 0;
1591         if (!query_info) {
1592                 /* query all the ip blocks that support ras query interface */
1593                 list_for_each_entry(obj, &con->head, node) {
1594                         struct ras_query_if info = {
1595                                 .head = obj->head,
1596                         };
1597
1598                         ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
1599                 }
1600         } else {
1601                 /* query specific ip block */
1602                 ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
1603         }
1604
1605         if (ret)
1606                 return ret;
1607
1608         if (ce_count)
1609                 *ce_count = ce;
1610
1611         if (ue_count)
1612                 *ue_count = ue;
1613
1614         return 0;
1615 }
1616 /* query/inject/cure end */
1617
1618
1619 /* sysfs begin */
1620
1621 static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1622                 struct ras_badpage **bps, unsigned int *count);
1623
1624 static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
1625 {
1626         switch (flags) {
1627         case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
1628                 return "R";
1629         case AMDGPU_RAS_RETIRE_PAGE_PENDING:
1630                 return "P";
1631         case AMDGPU_RAS_RETIRE_PAGE_FAULT:
1632         default:
1633                 return "F";
1634         }
1635 }
1636
1637 /**
1638  * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
1639  *
1640  * It allows user to read the bad pages of vram on the gpu through
1641  * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
1642  *
1643  * It outputs multiple lines, and each line stands for one gpu page.
1644  *
1645  * The format of one line is below,
1646  * gpu pfn : gpu page size : flags
1647  *
1648  * gpu pfn and gpu page size are printed in hex format.
1649  * flags can be one of below character,
1650  *
1651  * R: reserved, this gpu page is reserved and not able to use.
1652  *
1653  * P: pending for reserve, this gpu page is marked as bad, will be reserved
1654  * in next window of page_reserve.
1655  *
1656  * F: unable to reserve. this gpu page can't be reserved due to some reasons.
1657  *
1658  * Examples:
1659  *
1660  * .. code-block:: bash
1661  *
1662  *      0x00000001 : 0x00001000 : R
1663  *      0x00000002 : 0x00001000 : P
1664  *
1665  */
1666
1667 static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
1668                 struct kobject *kobj, struct bin_attribute *attr,
1669                 char *buf, loff_t ppos, size_t count)
1670 {
1671         struct amdgpu_ras *con =
1672                 container_of(attr, struct amdgpu_ras, badpages_attr);
1673         struct amdgpu_device *adev = con->adev;
1674         const unsigned int element_size =
1675                 sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
1676         unsigned int start = div64_ul(ppos + element_size - 1, element_size);
1677         unsigned int end = div64_ul(ppos + count - 1, element_size);
1678         ssize_t s = 0;
1679         struct ras_badpage *bps = NULL;
1680         unsigned int bps_count = 0;
1681
1682         memset(buf, 0, count);
1683
1684         if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
1685                 return 0;
1686
1687         for (; start < end && start < bps_count; start++)
1688                 s += scnprintf(&buf[s], element_size + 1,
1689                                 "0x%08x : 0x%08x : %1s\n",
1690                                 bps[start].bp,
1691                                 bps[start].size,
1692                                 amdgpu_ras_badpage_flags_str(bps[start].flags));
1693
1694         kfree(bps);
1695
1696         return s;
1697 }
1698
1699 static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
1700                 struct device_attribute *attr, char *buf)
1701 {
1702         struct amdgpu_ras *con =
1703                 container_of(attr, struct amdgpu_ras, features_attr);
1704
1705         return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
1706 }
1707
1708 static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
1709                 struct device_attribute *attr, char *buf)
1710 {
1711         struct amdgpu_ras *con =
1712                 container_of(attr, struct amdgpu_ras, version_attr);
1713         return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
1714 }
1715
1716 static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
1717                 struct device_attribute *attr, char *buf)
1718 {
1719         struct amdgpu_ras *con =
1720                 container_of(attr, struct amdgpu_ras, schema_attr);
1721         return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
1722 }
1723
1724 static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
1725 {
1726         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1727
1728         if (adev->dev->kobj.sd)
1729                 sysfs_remove_file_from_group(&adev->dev->kobj,
1730                                 &con->badpages_attr.attr,
1731                                 RAS_FS_NAME);
1732 }
1733
1734 static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
1735 {
1736         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1737         struct attribute *attrs[] = {
1738                 &con->features_attr.attr,
1739                 &con->version_attr.attr,
1740                 &con->schema_attr.attr,
1741                 NULL
1742         };
1743         struct attribute_group group = {
1744                 .name = RAS_FS_NAME,
1745                 .attrs = attrs,
1746         };
1747
1748         if (adev->dev->kobj.sd)
1749                 sysfs_remove_group(&adev->dev->kobj, &group);
1750
1751         return 0;
1752 }
1753
1754 int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
1755                 struct ras_common_if *head)
1756 {
1757         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1758
1759         if (amdgpu_aca_is_enabled(adev))
1760                 return 0;
1761
1762         if (!obj || obj->attr_inuse)
1763                 return -EINVAL;
1764
1765         get_obj(obj);
1766
1767         snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
1768                 "%s_err_count", head->name);
1769
1770         obj->sysfs_attr = (struct device_attribute){
1771                 .attr = {
1772                         .name = obj->fs_data.sysfs_name,
1773                         .mode = S_IRUGO,
1774                 },
1775                         .show = amdgpu_ras_sysfs_read,
1776         };
1777         sysfs_attr_init(&obj->sysfs_attr.attr);
1778
1779         if (sysfs_add_file_to_group(&adev->dev->kobj,
1780                                 &obj->sysfs_attr.attr,
1781                                 RAS_FS_NAME)) {
1782                 put_obj(obj);
1783                 return -EINVAL;
1784         }
1785
1786         obj->attr_inuse = 1;
1787
1788         return 0;
1789 }
1790
1791 int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
1792                 struct ras_common_if *head)
1793 {
1794         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1795
1796         if (amdgpu_aca_is_enabled(adev))
1797                 return 0;
1798
1799         if (!obj || !obj->attr_inuse)
1800                 return -EINVAL;
1801
1802         if (adev->dev->kobj.sd)
1803                 sysfs_remove_file_from_group(&adev->dev->kobj,
1804                                 &obj->sysfs_attr.attr,
1805                                 RAS_FS_NAME);
1806         obj->attr_inuse = 0;
1807         put_obj(obj);
1808
1809         return 0;
1810 }
1811
1812 static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
1813 {
1814         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1815         struct ras_manager *obj, *tmp;
1816
1817         list_for_each_entry_safe(obj, tmp, &con->head, node) {
1818                 amdgpu_ras_sysfs_remove(adev, &obj->head);
1819         }
1820
1821         if (amdgpu_bad_page_threshold != 0)
1822                 amdgpu_ras_sysfs_remove_bad_page_node(adev);
1823
1824         amdgpu_ras_sysfs_remove_dev_attr_node(adev);
1825
1826         return 0;
1827 }
1828 /* sysfs end */
1829
1830 /**
1831  * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
1832  *
1833  * Normally when there is an uncorrectable error, the driver will reset
1834  * the GPU to recover.  However, in the event of an unrecoverable error,
1835  * the driver provides an interface to reboot the system automatically
1836  * in that event.
1837  *
1838  * The following file in debugfs provides that interface:
1839  * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1840  *
1841  * Usage:
1842  *
1843  * .. code-block:: bash
1844  *
1845  *      echo true > .../ras/auto_reboot
1846  *
1847  */
1848 /* debugfs begin */
1849 static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
1850 {
1851         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1852         struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
1853         struct drm_minor  *minor = adev_to_drm(adev)->primary;
1854         struct dentry     *dir;
1855
1856         dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
1857         debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
1858                             &amdgpu_ras_debugfs_ctrl_ops);
1859         debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
1860                             &amdgpu_ras_debugfs_eeprom_ops);
1861         debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
1862                            &con->bad_page_cnt_threshold);
1863         debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
1864         debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
1865         debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
1866         debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
1867                             &amdgpu_ras_debugfs_eeprom_size_ops);
1868         con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
1869                                                        S_IRUGO, dir, adev,
1870                                                        &amdgpu_ras_debugfs_eeprom_table_ops);
1871         amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
1872
1873         /*
1874          * After one uncorrectable error happens, usually GPU recovery will
1875          * be scheduled. But due to the known problem in GPU recovery failing
1876          * to bring GPU back, below interface provides one direct way to
1877          * user to reboot system automatically in such case within
1878          * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
1879          * will never be called.
1880          */
1881         debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
1882
1883         /*
1884          * User could set this not to clean up hardware's error count register
1885          * of RAS IPs during ras recovery.
1886          */
1887         debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
1888                             &con->disable_ras_err_cnt_harvest);
1889         return dir;
1890 }
1891
1892 static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
1893                                       struct ras_fs_if *head,
1894                                       struct dentry *dir)
1895 {
1896         struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1897
1898         if (!obj || !dir)
1899                 return;
1900
1901         get_obj(obj);
1902
1903         memcpy(obj->fs_data.debugfs_name,
1904                         head->debugfs_name,
1905                         sizeof(obj->fs_data.debugfs_name));
1906
1907         debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
1908                             obj, &amdgpu_ras_debugfs_ops);
1909 }
1910
1911 static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
1912 {
1913         bool ret;
1914
1915         switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
1916         case IP_VERSION(13, 0, 6):
1917         case IP_VERSION(13, 0, 14):
1918                 ret = true;
1919                 break;
1920         default:
1921                 ret = false;
1922                 break;
1923         }
1924
1925         return ret;
1926 }
1927
1928 void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
1929 {
1930         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1931         struct dentry *dir;
1932         struct ras_manager *obj;
1933         struct ras_fs_if fs_info;
1934
1935         /*
1936          * it won't be called in resume path, no need to check
1937          * suspend and gpu reset status
1938          */
1939         if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
1940                 return;
1941
1942         dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
1943
1944         list_for_each_entry(obj, &con->head, node) {
1945                 if (amdgpu_ras_is_supported(adev, obj->head.block) &&
1946                         (obj->attr_inuse == 1)) {
1947                         sprintf(fs_info.debugfs_name, "%s_err_inject",
1948                                         get_ras_block_str(&obj->head));
1949                         fs_info.head = obj->head;
1950                         amdgpu_ras_debugfs_create(adev, &fs_info, dir);
1951                 }
1952         }
1953
1954         if (amdgpu_ras_aca_is_supported(adev)) {
1955                 if (amdgpu_aca_is_enabled(adev))
1956                         amdgpu_aca_smu_debugfs_init(adev, dir);
1957                 else
1958                         amdgpu_mca_smu_debugfs_init(adev, dir);
1959         }
1960 }
1961
1962 /* debugfs end */
1963
1964 /* ras fs */
1965 static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
1966                 amdgpu_ras_sysfs_badpages_read, NULL, 0);
1967 static DEVICE_ATTR(features, S_IRUGO,
1968                 amdgpu_ras_sysfs_features_read, NULL);
1969 static DEVICE_ATTR(version, 0444,
1970                 amdgpu_ras_sysfs_version_show, NULL);
1971 static DEVICE_ATTR(schema, 0444,
1972                 amdgpu_ras_sysfs_schema_show, NULL);
1973 static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1974 {
1975         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1976         struct attribute_group group = {
1977                 .name = RAS_FS_NAME,
1978         };
1979         struct attribute *attrs[] = {
1980                 &con->features_attr.attr,
1981                 &con->version_attr.attr,
1982                 &con->schema_attr.attr,
1983                 NULL
1984         };
1985         struct bin_attribute *bin_attrs[] = {
1986                 NULL,
1987                 NULL,
1988         };
1989         int r;
1990
1991         group.attrs = attrs;
1992
1993         /* add features entry */
1994         con->features_attr = dev_attr_features;
1995         sysfs_attr_init(attrs[0]);
1996
1997         /* add version entry */
1998         con->version_attr = dev_attr_version;
1999         sysfs_attr_init(attrs[1]);
2000
2001         /* add schema entry */
2002         con->schema_attr = dev_attr_schema;
2003         sysfs_attr_init(attrs[2]);
2004
2005         if (amdgpu_bad_page_threshold != 0) {
2006                 /* add bad_page_features entry */
2007                 bin_attr_gpu_vram_bad_pages.private = NULL;
2008                 con->badpages_attr = bin_attr_gpu_vram_bad_pages;
2009                 bin_attrs[0] = &con->badpages_attr;
2010                 group.bin_attrs = bin_attrs;
2011                 sysfs_bin_attr_init(bin_attrs[0]);
2012         }
2013
2014         r = sysfs_create_group(&adev->dev->kobj, &group);
2015         if (r)
2016                 dev_err(adev->dev, "Failed to create RAS sysfs group!");
2017
2018         return 0;
2019 }
2020
2021 static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
2022 {
2023         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2024         struct ras_manager *con_obj, *ip_obj, *tmp;
2025
2026         if (IS_ENABLED(CONFIG_DEBUG_FS)) {
2027                 list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
2028                         ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
2029                         if (ip_obj)
2030                                 put_obj(ip_obj);
2031                 }
2032         }
2033
2034         amdgpu_ras_sysfs_remove_all(adev);
2035         return 0;
2036 }
2037 /* ras fs end */
2038
2039 /* ih begin */
2040
2041 /* For the hardware that cannot enable bif ring for both ras_controller_irq
2042  * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
2043  * register to check whether the interrupt is triggered or not, and properly
2044  * ack the interrupt if it is there
2045  */
2046 void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
2047 {
2048         /* Fatal error events are handled on host side */
2049         if (amdgpu_sriov_vf(adev))
2050                 return;
2051
2052         if (adev->nbio.ras &&
2053             adev->nbio.ras->handle_ras_controller_intr_no_bifring)
2054                 adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
2055
2056         if (adev->nbio.ras &&
2057             adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
2058                 adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
2059 }
2060
2061 static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
2062                                 struct amdgpu_iv_entry *entry)
2063 {
2064         bool poison_stat = false;
2065         struct amdgpu_device *adev = obj->adev;
2066         struct amdgpu_ras_block_object *block_obj =
2067                 amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
2068         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2069
2070         if (!block_obj || !con)
2071                 return;
2072
2073         /* both query_poison_status and handle_poison_consumption are optional,
2074          * but at least one of them should be implemented if we need poison
2075          * consumption handler
2076          */
2077         if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
2078                 poison_stat = block_obj->hw_ops->query_poison_status(adev);
2079                 if (!poison_stat) {
2080                         /* Not poison consumption interrupt, no need to handle it */
2081                         dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
2082                                         block_obj->ras_comm.name);
2083
2084                         return;
2085                 }
2086         }
2087
2088         amdgpu_umc_poison_handler(adev, obj->head.block, 0);
2089
2090         if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
2091                 poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
2092
2093         /* gpu reset is fallback for failed and default cases.
2094          * For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
2095          */
2096         if (poison_stat && !con->is_rma) {
2097                 dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
2098                                 block_obj->ras_comm.name);
2099                 amdgpu_ras_reset_gpu(adev);
2100         }
2101
2102         if (!poison_stat)
2103                 amdgpu_gfx_poison_consumption_handler(adev, entry);
2104 }
2105
2106 static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
2107                                 struct amdgpu_iv_entry *entry)
2108 {
2109         dev_info(obj->adev->dev,
2110                 "Poison is created\n");
2111
2112         if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
2113                 struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
2114
2115                 atomic_inc(&con->page_retirement_req_cnt);
2116                 atomic_inc(&con->poison_creation_count);
2117
2118                 wake_up(&con->page_retirement_wq);
2119         }
2120 }
2121
2122 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
2123                                 struct amdgpu_iv_entry *entry)
2124 {
2125         struct ras_ih_data *data = &obj->ih_data;
2126         struct ras_err_data err_data;
2127         int ret;
2128
2129         if (!data->cb)
2130                 return;
2131
2132         ret = amdgpu_ras_error_data_init(&err_data);
2133         if (ret)
2134                 return;
2135
2136         /* Let IP handle its data, maybe we need get the output
2137          * from the callback to update the error type/count, etc
2138          */
2139         amdgpu_ras_set_fed(obj->adev, true);
2140         ret = data->cb(obj->adev, &err_data, entry);
2141         /* ue will trigger an interrupt, and in that case
2142          * we need do a reset to recovery the whole system.
2143          * But leave IP do that recovery, here we just dispatch
2144          * the error.
2145          */
2146         if (ret == AMDGPU_RAS_SUCCESS) {
2147                 /* these counts could be left as 0 if
2148                  * some blocks do not count error number
2149                  */
2150                 obj->err_data.ue_count += err_data.ue_count;
2151                 obj->err_data.ce_count += err_data.ce_count;
2152                 obj->err_data.de_count += err_data.de_count;
2153         }
2154
2155         amdgpu_ras_error_data_fini(&err_data);
2156 }
2157
2158 static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
2159 {
2160         struct ras_ih_data *data = &obj->ih_data;
2161         struct amdgpu_iv_entry entry;
2162
2163         while (data->rptr != data->wptr) {
2164                 rmb();
2165                 memcpy(&entry, &data->ring[data->rptr],
2166                                 data->element_size);
2167
2168                 wmb();
2169                 data->rptr = (data->aligned_element_size +
2170                                 data->rptr) % data->ring_size;
2171
2172                 if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
2173                         if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
2174                                 amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
2175                         else
2176                                 amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
2177                 } else {
2178                         if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
2179                                 amdgpu_ras_interrupt_umc_handler(obj, &entry);
2180                         else
2181                                 dev_warn(obj->adev->dev,
2182                                         "No RAS interrupt handler for non-UMC block with poison disabled.\n");
2183                 }
2184         }
2185 }
2186
2187 static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
2188 {
2189         struct ras_ih_data *data =
2190                 container_of(work, struct ras_ih_data, ih_work);
2191         struct ras_manager *obj =
2192                 container_of(data, struct ras_manager, ih_data);
2193
2194         amdgpu_ras_interrupt_handler(obj);
2195 }
2196
2197 int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
2198                 struct ras_dispatch_if *info)
2199 {
2200         struct ras_manager *obj;
2201         struct ras_ih_data *data;
2202
2203         obj = amdgpu_ras_find_obj(adev, &info->head);
2204         if (!obj)
2205                 return -EINVAL;
2206
2207         data = &obj->ih_data;
2208
2209         if (data->inuse == 0)
2210                 return 0;
2211
2212         /* Might be overflow... */
2213         memcpy(&data->ring[data->wptr], info->entry,
2214                         data->element_size);
2215
2216         wmb();
2217         data->wptr = (data->aligned_element_size +
2218                         data->wptr) % data->ring_size;
2219
2220         schedule_work(&data->ih_work);
2221
2222         return 0;
2223 }
2224
2225 int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
2226                 struct ras_common_if *head)
2227 {
2228         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
2229         struct ras_ih_data *data;
2230
2231         if (!obj)
2232                 return -EINVAL;
2233
2234         data = &obj->ih_data;
2235         if (data->inuse == 0)
2236                 return 0;
2237
2238         cancel_work_sync(&data->ih_work);
2239
2240         kfree(data->ring);
2241         memset(data, 0, sizeof(*data));
2242         put_obj(obj);
2243
2244         return 0;
2245 }
2246
2247 int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
2248                 struct ras_common_if *head)
2249 {
2250         struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
2251         struct ras_ih_data *data;
2252         struct amdgpu_ras_block_object *ras_obj;
2253
2254         if (!obj) {
2255                 /* in case we registe the IH before enable ras feature */
2256                 obj = amdgpu_ras_create_obj(adev, head);
2257                 if (!obj)
2258                         return -EINVAL;
2259         } else
2260                 get_obj(obj);
2261
2262         ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
2263
2264         data = &obj->ih_data;
2265         /* add the callback.etc */
2266         *data = (struct ras_ih_data) {
2267                 .inuse = 0,
2268                 .cb = ras_obj->ras_cb,
2269                 .element_size = sizeof(struct amdgpu_iv_entry),
2270                 .rptr = 0,
2271                 .wptr = 0,
2272         };
2273
2274         INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
2275
2276         data->aligned_element_size = ALIGN(data->element_size, 8);
2277         /* the ring can store 64 iv entries. */
2278         data->ring_size = 64 * data->aligned_element_size;
2279         data->ring = kmalloc(data->ring_size, GFP_KERNEL);
2280         if (!data->ring) {
2281                 put_obj(obj);
2282                 return -ENOMEM;
2283         }
2284
2285         /* IH is ready */
2286         data->inuse = 1;
2287
2288         return 0;
2289 }
2290
2291 static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
2292 {
2293         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2294         struct ras_manager *obj, *tmp;
2295
2296         list_for_each_entry_safe(obj, tmp, &con->head, node) {
2297                 amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
2298         }
2299
2300         return 0;
2301 }
2302 /* ih end */
2303
2304 /* traversal all IPs except NBIO to query error counter */
2305 static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
2306 {
2307         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2308         struct ras_manager *obj;
2309
2310         if (!adev->ras_enabled || !con)
2311                 return;
2312
2313         list_for_each_entry(obj, &con->head, node) {
2314                 struct ras_query_if info = {
2315                         .head = obj->head,
2316                 };
2317
2318                 /*
2319                  * PCIE_BIF IP has one different isr by ras controller
2320                  * interrupt, the specific ras counter query will be
2321                  * done in that isr. So skip such block from common
2322                  * sync flood interrupt isr calling.
2323                  */
2324                 if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
2325                         continue;
2326
2327                 /*
2328                  * this is a workaround for aldebaran, skip send msg to
2329                  * smu to get ecc_info table due to smu handle get ecc
2330                  * info table failed temporarily.
2331                  * should be removed until smu fix handle ecc_info table.
2332                  */
2333                 if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
2334                     (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
2335                      IP_VERSION(13, 0, 2)))
2336                         continue;
2337
2338                 amdgpu_ras_query_error_status(adev, &info);
2339
2340                 if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2341                             IP_VERSION(11, 0, 2) &&
2342                     amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2343                             IP_VERSION(11, 0, 4) &&
2344                     amdgpu_ip_version(adev, MP0_HWIP, 0) !=
2345                             IP_VERSION(13, 0, 0)) {
2346                         if (amdgpu_ras_reset_error_status(adev, info.head.block))
2347                                 dev_warn(adev->dev, "Failed to reset error counter and error status");
2348                 }
2349         }
2350 }
2351
2352 /* Parse RdRspStatus and WrRspStatus */
2353 static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
2354                                           struct ras_query_if *info)
2355 {
2356         struct amdgpu_ras_block_object *block_obj;
2357         /*
2358          * Only two block need to query read/write
2359          * RspStatus at current state
2360          */
2361         if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
2362                 (info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
2363                 return;
2364
2365         block_obj = amdgpu_ras_get_ras_block(adev,
2366                                         info->head.block,
2367                                         info->head.sub_block_index);
2368
2369         if (!block_obj || !block_obj->hw_ops) {
2370                 dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
2371                              get_ras_block_str(&info->head));
2372                 return;
2373         }
2374
2375         if (block_obj->hw_ops->query_ras_error_status)
2376                 block_obj->hw_ops->query_ras_error_status(adev);
2377
2378 }
2379
2380 static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
2381 {
2382         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2383         struct ras_manager *obj;
2384
2385         if (!adev->ras_enabled || !con)
2386                 return;
2387
2388         list_for_each_entry(obj, &con->head, node) {
2389                 struct ras_query_if info = {
2390                         .head = obj->head,
2391                 };
2392
2393                 amdgpu_ras_error_status_query(adev, &info);
2394         }
2395 }
2396
2397 /* recovery begin */
2398
2399 /* return 0 on success.
2400  * caller need free bps.
2401  */
2402 static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
2403                 struct ras_badpage **bps, unsigned int *count)
2404 {
2405         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2406         struct ras_err_handler_data *data;
2407         int i = 0;
2408         int ret = 0, status;
2409
2410         if (!con || !con->eh_data || !bps || !count)
2411                 return -EINVAL;
2412
2413         mutex_lock(&con->recovery_lock);
2414         data = con->eh_data;
2415         if (!data || data->count == 0) {
2416                 *bps = NULL;
2417                 ret = -EINVAL;
2418                 goto out;
2419         }
2420
2421         *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
2422         if (!*bps) {
2423                 ret = -ENOMEM;
2424                 goto out;
2425         }
2426
2427         for (; i < data->count; i++) {
2428                 (*bps)[i] = (struct ras_badpage){
2429                         .bp = data->bps[i].retired_page,
2430                         .size = AMDGPU_GPU_PAGE_SIZE,
2431                         .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
2432                 };
2433                 status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
2434                                 data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
2435                 if (status == -EBUSY)
2436                         (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
2437                 else if (status == -ENOENT)
2438                         (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
2439         }
2440
2441         *count = data->count;
2442 out:
2443         mutex_unlock(&con->recovery_lock);
2444         return ret;
2445 }
2446
2447 static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
2448                                    struct amdgpu_hive_info *hive, bool status)
2449 {
2450         struct amdgpu_device *tmp_adev;
2451
2452         if (hive) {
2453                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
2454                         amdgpu_ras_set_fed(tmp_adev, status);
2455         } else {
2456                 amdgpu_ras_set_fed(adev, status);
2457         }
2458 }
2459
2460 bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
2461 {
2462         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2463         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
2464         int hive_ras_recovery = 0;
2465
2466         if (hive) {
2467                 hive_ras_recovery = atomic_read(&hive->ras_recovery);
2468                 amdgpu_put_xgmi_hive(hive);
2469         }
2470
2471         if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
2472                 return true;
2473
2474         return false;
2475 }
2476
2477 static void amdgpu_ras_do_recovery(struct work_struct *work)
2478 {
2479         struct amdgpu_ras *ras =
2480                 container_of(work, struct amdgpu_ras, recovery_work);
2481         struct amdgpu_device *remote_adev = NULL;
2482         struct amdgpu_device *adev = ras->adev;
2483         struct list_head device_list, *device_list_handle =  NULL;
2484         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2485
2486         if (hive) {
2487                 atomic_set(&hive->ras_recovery, 1);
2488
2489                 /* If any device which is part of the hive received RAS fatal
2490                  * error interrupt, set fatal error status on all. This
2491                  * condition will need a recovery, and flag will be cleared
2492                  * as part of recovery.
2493                  */
2494                 list_for_each_entry(remote_adev, &hive->device_list,
2495                                     gmc.xgmi.head)
2496                         if (amdgpu_ras_get_fed_status(remote_adev)) {
2497                                 amdgpu_ras_set_fed_all(adev, hive, true);
2498                                 break;
2499                         }
2500         }
2501         if (!ras->disable_ras_err_cnt_harvest) {
2502
2503                 /* Build list of devices to query RAS related errors */
2504                 if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
2505                         device_list_handle = &hive->device_list;
2506                 } else {
2507                         INIT_LIST_HEAD(&device_list);
2508                         list_add_tail(&adev->gmc.xgmi.head, &device_list);
2509                         device_list_handle = &device_list;
2510                 }
2511
2512                 list_for_each_entry(remote_adev,
2513                                 device_list_handle, gmc.xgmi.head) {
2514                         amdgpu_ras_query_err_status(remote_adev);
2515                         amdgpu_ras_log_on_err_counter(remote_adev);
2516                 }
2517
2518         }
2519
2520         if (amdgpu_device_should_recover_gpu(ras->adev)) {
2521                 struct amdgpu_reset_context reset_context;
2522                 memset(&reset_context, 0, sizeof(reset_context));
2523
2524                 reset_context.method = AMD_RESET_METHOD_NONE;
2525                 reset_context.reset_req_dev = adev;
2526                 reset_context.src = AMDGPU_RESET_SRC_RAS;
2527
2528                 /* Perform full reset in fatal error mode */
2529                 if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
2530                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2531                 else {
2532                         clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2533
2534                         if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
2535                                 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
2536                                 reset_context.method = AMD_RESET_METHOD_MODE2;
2537                         }
2538
2539                         /* Fatal error occurs in poison mode, mode1 reset is used to
2540                          * recover gpu.
2541                          */
2542                         if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
2543                                 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
2544                                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
2545
2546                                 psp_fatal_error_recovery_quirk(&adev->psp);
2547                         }
2548                 }
2549
2550                 amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
2551         }
2552         atomic_set(&ras->in_recovery, 0);
2553         if (hive) {
2554                 atomic_set(&hive->ras_recovery, 0);
2555                 amdgpu_put_xgmi_hive(hive);
2556         }
2557 }
2558
2559 /* alloc/realloc bps array */
2560 static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
2561                 struct ras_err_handler_data *data, int pages)
2562 {
2563         unsigned int old_space = data->count + data->space_left;
2564         unsigned int new_space = old_space + pages;
2565         unsigned int align_space = ALIGN(new_space, 512);
2566         void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
2567
2568         if (!bps) {
2569                 return -ENOMEM;
2570         }
2571
2572         if (data->bps) {
2573                 memcpy(bps, data->bps,
2574                                 data->count * sizeof(*data->bps));
2575                 kfree(data->bps);
2576         }
2577
2578         data->bps = bps;
2579         data->space_left += align_space - old_space;
2580         return 0;
2581 }
2582
2583 /* it deal with vram only. */
2584 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
2585                 struct eeprom_table_record *bps, int pages)
2586 {
2587         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2588         struct ras_err_handler_data *data;
2589         int ret = 0;
2590         uint32_t i;
2591
2592         if (!con || !con->eh_data || !bps || pages <= 0)
2593                 return 0;
2594
2595         mutex_lock(&con->recovery_lock);
2596         data = con->eh_data;
2597         if (!data)
2598                 goto out;
2599
2600         for (i = 0; i < pages; i++) {
2601                 if (amdgpu_ras_check_bad_page_unlock(con,
2602                         bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
2603                         continue;
2604
2605                 if (!data->space_left &&
2606                         amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
2607                         ret = -ENOMEM;
2608                         goto out;
2609                 }
2610
2611                 amdgpu_ras_reserve_page(adev, bps[i].retired_page);
2612
2613                 memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
2614                 data->count++;
2615                 data->space_left--;
2616         }
2617 out:
2618         mutex_unlock(&con->recovery_lock);
2619
2620         return ret;
2621 }
2622
2623 /*
2624  * write error record array to eeprom, the function should be
2625  * protected by recovery_lock
2626  * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
2627  */
2628 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
2629                 unsigned long *new_cnt)
2630 {
2631         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2632         struct ras_err_handler_data *data;
2633         struct amdgpu_ras_eeprom_control *control;
2634         int save_count;
2635
2636         if (!con || !con->eh_data) {
2637                 if (new_cnt)
2638                         *new_cnt = 0;
2639
2640                 return 0;
2641         }
2642
2643         mutex_lock(&con->recovery_lock);
2644         control = &con->eeprom_control;
2645         data = con->eh_data;
2646         save_count = data->count - control->ras_num_recs;
2647         mutex_unlock(&con->recovery_lock);
2648
2649         if (new_cnt)
2650                 *new_cnt = save_count / adev->umc.retire_unit;
2651
2652         /* only new entries are saved */
2653         if (save_count > 0) {
2654                 if (amdgpu_ras_eeprom_append(control,
2655                                              &data->bps[control->ras_num_recs],
2656                                              save_count)) {
2657                         dev_err(adev->dev, "Failed to save EEPROM table data!");
2658                         return -EIO;
2659                 }
2660
2661                 dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
2662         }
2663
2664         return 0;
2665 }
2666
2667 /*
2668  * read error record array in eeprom and reserve enough space for
2669  * storing new bad pages
2670  */
2671 static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
2672 {
2673         struct amdgpu_ras_eeprom_control *control =
2674                 &adev->psp.ras_context.ras->eeprom_control;
2675         struct eeprom_table_record *bps;
2676         int ret;
2677
2678         /* no bad page record, skip eeprom access */
2679         if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
2680                 return 0;
2681
2682         bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
2683         if (!bps)
2684                 return -ENOMEM;
2685
2686         ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
2687         if (ret)
2688                 dev_err(adev->dev, "Failed to load EEPROM table records!");
2689         else
2690                 ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
2691
2692         kfree(bps);
2693         return ret;
2694 }
2695
2696 static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
2697                                 uint64_t addr)
2698 {
2699         struct ras_err_handler_data *data = con->eh_data;
2700         int i;
2701
2702         addr >>= AMDGPU_GPU_PAGE_SHIFT;
2703         for (i = 0; i < data->count; i++)
2704                 if (addr == data->bps[i].retired_page)
2705                         return true;
2706
2707         return false;
2708 }
2709
2710 /*
2711  * check if an address belongs to bad page
2712  *
2713  * Note: this check is only for umc block
2714  */
2715 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
2716                                 uint64_t addr)
2717 {
2718         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2719         bool ret = false;
2720
2721         if (!con || !con->eh_data)
2722                 return ret;
2723
2724         mutex_lock(&con->recovery_lock);
2725         ret = amdgpu_ras_check_bad_page_unlock(con, addr);
2726         mutex_unlock(&con->recovery_lock);
2727         return ret;
2728 }
2729
2730 static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
2731                                           uint32_t max_count)
2732 {
2733         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2734
2735         /*
2736          * Justification of value bad_page_cnt_threshold in ras structure
2737          *
2738          * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
2739          * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
2740          * scenarios accordingly.
2741          *
2742          * Bad page retirement enablement:
2743          *    - If amdgpu_bad_page_threshold = -2,
2744          *      bad_page_cnt_threshold = typical value by formula.
2745          *
2746          *    - When the value from user is 0 < amdgpu_bad_page_threshold <
2747          *      max record length in eeprom, use it directly.
2748          *
2749          * Bad page retirement disablement:
2750          *    - If amdgpu_bad_page_threshold = 0, bad page retirement
2751          *      functionality is disabled, and bad_page_cnt_threshold will
2752          *      take no effect.
2753          */
2754
2755         if (amdgpu_bad_page_threshold < 0) {
2756                 u64 val = adev->gmc.mc_vram_size;
2757
2758                 do_div(val, RAS_BAD_PAGE_COVER);
2759                 con->bad_page_cnt_threshold = min(lower_32_bits(val),
2760                                                   max_count);
2761         } else {
2762                 con->bad_page_cnt_threshold = min_t(int, max_count,
2763                                                     amdgpu_bad_page_threshold);
2764         }
2765 }
2766
2767 int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
2768                 enum amdgpu_ras_block block, uint16_t pasid,
2769                 pasid_notify pasid_fn, void *data, uint32_t reset)
2770 {
2771         int ret = 0;
2772         struct ras_poison_msg poison_msg;
2773         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2774
2775         memset(&poison_msg, 0, sizeof(poison_msg));
2776         poison_msg.block = block;
2777         poison_msg.pasid = pasid;
2778         poison_msg.reset = reset;
2779         poison_msg.pasid_fn = pasid_fn;
2780         poison_msg.data = data;
2781
2782         ret = kfifo_put(&con->poison_fifo, poison_msg);
2783         if (!ret) {
2784                 dev_err(adev->dev, "Poison message fifo is full!\n");
2785                 return -ENOSPC;
2786         }
2787
2788         return 0;
2789 }
2790
2791 static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
2792                 struct ras_poison_msg *poison_msg)
2793 {
2794         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2795
2796         return kfifo_get(&con->poison_fifo, poison_msg);
2797 }
2798
2799 static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
2800 {
2801         mutex_init(&ecc_log->lock);
2802
2803         /* Set any value as siphash key */
2804         memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key));
2805
2806         INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
2807         ecc_log->de_queried_count = 0;
2808         ecc_log->prev_de_queried_count = 0;
2809 }
2810
2811 static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
2812 {
2813         struct radix_tree_iter iter;
2814         void __rcu **slot;
2815         struct ras_ecc_err *ecc_err;
2816
2817         mutex_lock(&ecc_log->lock);
2818         radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
2819                 ecc_err = radix_tree_deref_slot(slot);
2820                 kfree(ecc_err->err_pages.pfn);
2821                 kfree(ecc_err);
2822                 radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
2823         }
2824         mutex_unlock(&ecc_log->lock);
2825
2826         mutex_destroy(&ecc_log->lock);
2827         ecc_log->de_queried_count = 0;
2828         ecc_log->prev_de_queried_count = 0;
2829 }
2830
2831 static void amdgpu_ras_do_page_retirement(struct work_struct *work)
2832 {
2833         struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
2834                                               page_retirement_dwork.work);
2835         struct amdgpu_device *adev = con->adev;
2836         struct ras_err_data err_data;
2837         unsigned long err_cnt;
2838
2839         if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev))
2840                 return;
2841
2842         amdgpu_ras_error_data_init(&err_data);
2843
2844         amdgpu_umc_handle_bad_pages(adev, &err_data);
2845         err_cnt = err_data.err_addr_cnt;
2846
2847         amdgpu_ras_error_data_fini(&err_data);
2848
2849         if (err_cnt && con->is_rma)
2850                 amdgpu_ras_reset_gpu(adev);
2851
2852         mutex_lock(&con->umc_ecc_log.lock);
2853         if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
2854                                 UMC_ECC_NEW_DETECTED_TAG))
2855                 schedule_delayed_work(&con->page_retirement_dwork,
2856                         msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL));
2857         mutex_unlock(&con->umc_ecc_log.lock);
2858 }
2859
2860 static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
2861                                 uint32_t poison_creation_count)
2862 {
2863         int ret = 0;
2864         struct ras_ecc_log_info *ecc_log;
2865         struct ras_query_if info;
2866         uint32_t timeout = 0;
2867         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
2868         uint64_t de_queried_count;
2869         uint32_t new_detect_count, total_detect_count;
2870         uint32_t need_query_count = poison_creation_count;
2871         bool query_data_timeout = false;
2872
2873         memset(&info, 0, sizeof(info));
2874         info.head.block = AMDGPU_RAS_BLOCK__UMC;
2875
2876         ecc_log = &ras->umc_ecc_log;
2877         total_detect_count = 0;
2878         do {
2879                 ret = amdgpu_ras_query_error_status(adev, &info);
2880                 if (ret)
2881                         return ret;
2882
2883                 de_queried_count = ecc_log->de_queried_count;
2884                 if (de_queried_count > ecc_log->prev_de_queried_count) {
2885                         new_detect_count = de_queried_count - ecc_log->prev_de_queried_count;
2886                         ecc_log->prev_de_queried_count = de_queried_count;
2887                         timeout = 0;
2888                 } else {
2889                         new_detect_count = 0;
2890                 }
2891
2892                 if (new_detect_count) {
2893                         total_detect_count += new_detect_count;
2894                 } else {
2895                         if (!timeout && need_query_count)
2896                                 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
2897
2898                         if (timeout) {
2899                                 if (!--timeout) {
2900                                         query_data_timeout = true;
2901                                         break;
2902                                 }
2903                                 msleep(1);
2904                         }
2905                 }
2906         } while (total_detect_count < need_query_count);
2907
2908         if (query_data_timeout) {
2909                 dev_warn(adev->dev, "Can't find deferred error! count: %u\n",
2910                         (need_query_count - total_detect_count));
2911                 return -ENOENT;
2912         }
2913
2914         if (total_detect_count)
2915                 schedule_delayed_work(&ras->page_retirement_dwork, 0);
2916
2917         return 0;
2918 }
2919
2920 static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
2921 {
2922         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2923         struct ras_poison_msg msg;
2924         int ret;
2925
2926         do {
2927                 ret = kfifo_get(&con->poison_fifo, &msg);
2928         } while (ret);
2929 }
2930
2931 static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
2932                         uint32_t msg_count, uint32_t *gpu_reset)
2933 {
2934         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2935         uint32_t reset_flags = 0, reset = 0;
2936         struct ras_poison_msg msg;
2937         int ret, i;
2938
2939         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
2940
2941         for (i = 0; i < msg_count; i++) {
2942                 ret = amdgpu_ras_get_poison_req(adev, &msg);
2943                 if (!ret)
2944                         continue;
2945
2946                 if (msg.pasid_fn)
2947                         msg.pasid_fn(adev, msg.pasid, msg.data);
2948
2949                 reset_flags |= msg.reset;
2950         }
2951
2952         /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
2953         if (reset_flags && !con->is_rma) {
2954                 if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
2955                         reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
2956                 else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
2957                         reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
2958                 else
2959                         reset = reset_flags;
2960
2961                 flush_delayed_work(&con->page_retirement_dwork);
2962
2963                 con->gpu_reset_flags |= reset;
2964                 amdgpu_ras_reset_gpu(adev);
2965
2966                 *gpu_reset = reset;
2967
2968                 /* Wait for gpu recovery to complete */
2969                 flush_work(&con->recovery_work);
2970         }
2971
2972         return 0;
2973 }
2974
2975 static int amdgpu_ras_page_retirement_thread(void *param)
2976 {
2977         struct amdgpu_device *adev = (struct amdgpu_device *)param;
2978         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
2979         uint32_t poison_creation_count, msg_count;
2980         uint32_t gpu_reset;
2981         int ret;
2982
2983         while (!kthread_should_stop()) {
2984
2985                 wait_event_interruptible(con->page_retirement_wq,
2986                                 kthread_should_stop() ||
2987                                 atomic_read(&con->page_retirement_req_cnt));
2988
2989                 if (kthread_should_stop())
2990                         break;
2991
2992                 gpu_reset = 0;
2993
2994                 do {
2995                         poison_creation_count = atomic_read(&con->poison_creation_count);
2996                         ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count);
2997                         if (ret == -EIO)
2998                                 break;
2999
3000                         if (poison_creation_count) {
3001                                 atomic_sub(poison_creation_count, &con->poison_creation_count);
3002                                 atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
3003                         }
3004                 } while (atomic_read(&con->poison_creation_count));
3005
3006                 if (ret != -EIO) {
3007                         msg_count = kfifo_len(&con->poison_fifo);
3008                         if (msg_count) {
3009                                 ret = amdgpu_ras_poison_consumption_handler(adev,
3010                                                 msg_count, &gpu_reset);
3011                                 if ((ret != -EIO) &&
3012                                     (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
3013                                         atomic_sub(msg_count, &con->page_retirement_req_cnt);
3014                         }
3015                 }
3016
3017                 if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
3018                         /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
3019                         /* Clear poison creation request */
3020                         atomic_set(&con->poison_creation_count, 0);
3021
3022                         /* Clear poison fifo */
3023                         amdgpu_ras_clear_poison_fifo(adev);
3024
3025                         /* Clear all poison requests */
3026                         atomic_set(&con->page_retirement_req_cnt, 0);
3027
3028                         if (ret == -EIO) {
3029                                 /* Wait for mode-1 reset to complete */
3030                                 down_read(&adev->reset_domain->sem);
3031                                 up_read(&adev->reset_domain->sem);
3032                         }
3033
3034                         /* Wake up work to save bad pages to eeprom */
3035                         schedule_delayed_work(&con->page_retirement_dwork, 0);
3036                 } else if (gpu_reset) {
3037                         /* gpu just completed mode-2 reset or other reset */
3038                         /* Clear poison consumption messages cached in fifo */
3039                         msg_count = kfifo_len(&con->poison_fifo);
3040                         if (msg_count) {
3041                                 amdgpu_ras_clear_poison_fifo(adev);
3042                                 atomic_sub(msg_count, &con->page_retirement_req_cnt);
3043                         }
3044
3045                         /* Wake up work to save bad pages to eeprom */
3046                         schedule_delayed_work(&con->page_retirement_dwork, 0);
3047                 }
3048         }
3049
3050         return 0;
3051 }
3052
3053 int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
3054 {
3055         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3056         struct ras_err_handler_data **data;
3057         u32  max_eeprom_records_count = 0;
3058         int ret;
3059
3060         if (!con || amdgpu_sriov_vf(adev))
3061                 return 0;
3062
3063         /* Allow access to RAS EEPROM via debugfs, when the ASIC
3064          * supports RAS and debugfs is enabled, but when
3065          * adev->ras_enabled is unset, i.e. when "ras_enable"
3066          * module parameter is set to 0.
3067          */
3068         con->adev = adev;
3069
3070         if (!adev->ras_enabled)
3071                 return 0;
3072
3073         data = &con->eh_data;
3074         *data = kzalloc(sizeof(**data), GFP_KERNEL);
3075         if (!*data) {
3076                 ret = -ENOMEM;
3077                 goto out;
3078         }
3079
3080         mutex_init(&con->recovery_lock);
3081         INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
3082         atomic_set(&con->in_recovery, 0);
3083         con->eeprom_control.bad_channel_bitmap = 0;
3084
3085         max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
3086         amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
3087
3088         /* Todo: During test the SMU might fail to read the eeprom through I2C
3089          * when the GPU is pending on XGMI reset during probe time
3090          * (Mostly after second bus reset), skip it now
3091          */
3092         if (adev->gmc.xgmi.pending_reset)
3093                 return 0;
3094         ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
3095         /*
3096          * This calling fails when is_rma is true or
3097          * ret != 0.
3098          */
3099         if (con->is_rma || ret)
3100                 goto free;
3101
3102         if (con->eeprom_control.ras_num_recs) {
3103                 ret = amdgpu_ras_load_bad_pages(adev);
3104                 if (ret)
3105                         goto free;
3106
3107                 amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
3108
3109                 if (con->update_channel_flag == true) {
3110                         amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
3111                         con->update_channel_flag = false;
3112                 }
3113         }
3114
3115         mutex_init(&con->page_rsv_lock);
3116         INIT_KFIFO(con->poison_fifo);
3117         mutex_init(&con->page_retirement_lock);
3118         init_waitqueue_head(&con->page_retirement_wq);
3119         atomic_set(&con->page_retirement_req_cnt, 0);
3120         atomic_set(&con->poison_creation_count, 0);
3121         con->page_retirement_thread =
3122                 kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
3123         if (IS_ERR(con->page_retirement_thread)) {
3124                 con->page_retirement_thread = NULL;
3125                 dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
3126         }
3127
3128         INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
3129         amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
3130 #ifdef CONFIG_X86_MCE_AMD
3131         if ((adev->asic_type == CHIP_ALDEBARAN) &&
3132             (adev->gmc.xgmi.connected_to_cpu))
3133                 amdgpu_register_bad_pages_mca_notifier(adev);
3134 #endif
3135         return 0;
3136
3137 free:
3138         kfree((*data)->bps);
3139         kfree(*data);
3140         con->eh_data = NULL;
3141 out:
3142         dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
3143
3144         /*
3145          * Except error threshold exceeding case, other failure cases in this
3146          * function would not fail amdgpu driver init.
3147          */
3148         if (!con->is_rma)
3149                 ret = 0;
3150         else
3151                 ret = -EINVAL;
3152
3153         return ret;
3154 }
3155
3156 static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
3157 {
3158         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3159         struct ras_err_handler_data *data = con->eh_data;
3160
3161         /* recovery_init failed to init it, fini is useless */
3162         if (!data)
3163                 return 0;
3164
3165         if (con->page_retirement_thread)
3166                 kthread_stop(con->page_retirement_thread);
3167
3168         atomic_set(&con->page_retirement_req_cnt, 0);
3169         atomic_set(&con->poison_creation_count, 0);
3170
3171         mutex_destroy(&con->page_rsv_lock);
3172
3173         cancel_work_sync(&con->recovery_work);
3174
3175         cancel_delayed_work_sync(&con->page_retirement_dwork);
3176
3177         amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
3178
3179         mutex_lock(&con->recovery_lock);
3180         con->eh_data = NULL;
3181         kfree(data->bps);
3182         kfree(data);
3183         mutex_unlock(&con->recovery_lock);
3184
3185         return 0;
3186 }
3187 /* recovery end */
3188
3189 static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
3190 {
3191         if (amdgpu_sriov_vf(adev)) {
3192                 switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
3193                 case IP_VERSION(13, 0, 2):
3194                 case IP_VERSION(13, 0, 6):
3195                 case IP_VERSION(13, 0, 14):
3196                         return true;
3197                 default:
3198                         return false;
3199                 }
3200         }
3201
3202         if (adev->asic_type == CHIP_IP_DISCOVERY) {
3203                 switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
3204                 case IP_VERSION(13, 0, 0):
3205                 case IP_VERSION(13, 0, 6):
3206                 case IP_VERSION(13, 0, 10):
3207                 case IP_VERSION(13, 0, 14):
3208                         return true;
3209                 default:
3210                         return false;
3211                 }
3212         }
3213
3214         return adev->asic_type == CHIP_VEGA10 ||
3215                 adev->asic_type == CHIP_VEGA20 ||
3216                 adev->asic_type == CHIP_ARCTURUS ||
3217                 adev->asic_type == CHIP_ALDEBARAN ||
3218                 adev->asic_type == CHIP_SIENNA_CICHLID;
3219 }
3220
3221 /*
3222  * this is workaround for vega20 workstation sku,
3223  * force enable gfx ras, ignore vbios gfx ras flag
3224  * due to GC EDC can not write
3225  */
3226 static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
3227 {
3228         struct atom_context *ctx = adev->mode_info.atom_context;
3229
3230         if (!ctx)
3231                 return;
3232
3233         if (strnstr(ctx->vbios_pn, "D16406",
3234                     sizeof(ctx->vbios_pn)) ||
3235                 strnstr(ctx->vbios_pn, "D36002",
3236                         sizeof(ctx->vbios_pn)))
3237                 adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
3238 }
3239
3240 /* Query ras capablity via atomfirmware interface */
3241 static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
3242 {
3243         /* mem_ecc cap */
3244         if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
3245                 dev_info(adev->dev, "MEM ECC is active.\n");
3246                 adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
3247                                          1 << AMDGPU_RAS_BLOCK__DF);
3248         } else {
3249                 dev_info(adev->dev, "MEM ECC is not presented.\n");
3250         }
3251
3252         /* sram_ecc cap */
3253         if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
3254                 dev_info(adev->dev, "SRAM ECC is active.\n");
3255                 if (!amdgpu_sriov_vf(adev))
3256                         adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
3257                                                   1 << AMDGPU_RAS_BLOCK__DF);
3258                 else
3259                         adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
3260                                                  1 << AMDGPU_RAS_BLOCK__SDMA |
3261                                                  1 << AMDGPU_RAS_BLOCK__GFX);
3262
3263                 /*
3264                  * VCN/JPEG RAS can be supported on both bare metal and
3265                  * SRIOV environment
3266                  */
3267                 if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
3268                     amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
3269                     amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3))
3270                         adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
3271                                                  1 << AMDGPU_RAS_BLOCK__JPEG);
3272                 else
3273                         adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
3274                                                   1 << AMDGPU_RAS_BLOCK__JPEG);
3275
3276                 /*
3277                  * XGMI RAS is not supported if xgmi num physical nodes
3278                  * is zero
3279                  */
3280                 if (!adev->gmc.xgmi.num_physical_nodes)
3281                         adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
3282         } else {
3283                 dev_info(adev->dev, "SRAM ECC is not presented.\n");
3284         }
3285 }
3286
3287 /* Query poison mode from umc/df IP callbacks */
3288 static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
3289 {
3290         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3291         bool df_poison, umc_poison;
3292
3293         /* poison setting is useless on SRIOV guest */
3294         if (amdgpu_sriov_vf(adev) || !con)
3295                 return;
3296
3297         /* Init poison supported flag, the default value is false */
3298         if (adev->gmc.xgmi.connected_to_cpu ||
3299             adev->gmc.is_app_apu) {
3300                 /* enabled by default when GPU is connected to CPU */
3301                 con->poison_supported = true;
3302         } else if (adev->df.funcs &&
3303             adev->df.funcs->query_ras_poison_mode &&
3304             adev->umc.ras &&
3305             adev->umc.ras->query_ras_poison_mode) {
3306                 df_poison =
3307                         adev->df.funcs->query_ras_poison_mode(adev);
3308                 umc_poison =
3309                         adev->umc.ras->query_ras_poison_mode(adev);
3310
3311                 /* Only poison is set in both DF and UMC, we can support it */
3312                 if (df_poison && umc_poison)
3313                         con->poison_supported = true;
3314                 else if (df_poison != umc_poison)
3315                         dev_warn(adev->dev,
3316                                 "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
3317                                 df_poison, umc_poison);
3318         }
3319 }
3320
3321 /*
3322  * check hardware's ras ability which will be saved in hw_supported.
3323  * if hardware does not support ras, we can skip some ras initializtion and
3324  * forbid some ras operations from IP.
3325  * if software itself, say boot parameter, limit the ras ability. We still
3326  * need allow IP do some limited operations, like disable. In such case,
3327  * we have to initialize ras as normal. but need check if operation is
3328  * allowed or not in each function.
3329  */
3330 static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
3331 {
3332         adev->ras_hw_enabled = adev->ras_enabled = 0;
3333
3334         if (!amdgpu_ras_asic_supported(adev))
3335                 return;
3336
3337         /* query ras capability from psp */
3338         if (amdgpu_psp_get_ras_capability(&adev->psp))
3339                 goto init_ras_enabled_flag;
3340
3341         /* query ras capablity from bios */
3342         if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
3343                 amdgpu_ras_query_ras_capablity_from_vbios(adev);
3344         } else {
3345                 /* driver only manages a few IP blocks RAS feature
3346                  * when GPU is connected cpu through XGMI */
3347                 adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
3348                                            1 << AMDGPU_RAS_BLOCK__SDMA |
3349                                            1 << AMDGPU_RAS_BLOCK__MMHUB);
3350         }
3351
3352         /* apply asic specific settings (vega20 only for now) */
3353         amdgpu_ras_get_quirks(adev);
3354
3355         /* query poison mode from umc/df ip callback */
3356         amdgpu_ras_query_poison_mode(adev);
3357
3358 init_ras_enabled_flag:
3359         /* hw_supported needs to be aligned with RAS block mask. */
3360         adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
3361
3362         adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
3363                 adev->ras_hw_enabled & amdgpu_ras_mask;
3364
3365         /* aca is disabled by default */
3366         adev->aca.is_enabled = false;
3367 }
3368
3369 static void amdgpu_ras_counte_dw(struct work_struct *work)
3370 {
3371         struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
3372                                               ras_counte_delay_work.work);
3373         struct amdgpu_device *adev = con->adev;
3374         struct drm_device *dev = adev_to_drm(adev);
3375         unsigned long ce_count, ue_count;
3376         int res;
3377
3378         res = pm_runtime_get_sync(dev->dev);
3379         if (res < 0)
3380                 goto Out;
3381
3382         /* Cache new values.
3383          */
3384         if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
3385                 atomic_set(&con->ras_ce_count, ce_count);
3386                 atomic_set(&con->ras_ue_count, ue_count);
3387         }
3388
3389         pm_runtime_mark_last_busy(dev->dev);
3390 Out:
3391         pm_runtime_put_autosuspend(dev->dev);
3392 }
3393
3394 static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
3395 {
3396         return  amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
3397                         AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
3398                         AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
3399                         AMDGPU_RAS_ERROR__PARITY;
3400 }
3401
3402 static void ras_event_mgr_init(struct ras_event_manager *mgr)
3403 {
3404         int i;
3405
3406         for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++)
3407                 atomic64_set(&mgr->seqnos[i], 0);
3408 }
3409
3410 static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
3411 {
3412         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3413         struct amdgpu_hive_info *hive;
3414
3415         if (!ras)
3416                 return;
3417
3418         hive = amdgpu_get_xgmi_hive(adev);
3419         ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
3420
3421         /* init event manager with node 0 on xgmi system */
3422         if (!amdgpu_in_reset(adev)) {
3423                 if (!hive || adev->gmc.xgmi.node_id == 0)
3424                         ras_event_mgr_init(ras->event_mgr);
3425         }
3426
3427         if (hive)
3428                 amdgpu_put_xgmi_hive(hive);
3429 }
3430
3431 static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
3432 {
3433         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3434
3435         if (!con || (adev->flags & AMD_IS_APU))
3436                 return;
3437
3438         switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
3439         case IP_VERSION(13, 0, 2):
3440         case IP_VERSION(13, 0, 6):
3441         case IP_VERSION(13, 0, 14):
3442                 con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
3443                 break;
3444         default:
3445                 break;
3446         }
3447 }
3448
3449 int amdgpu_ras_init(struct amdgpu_device *adev)
3450 {
3451         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3452         int r;
3453
3454         if (con)
3455                 return 0;
3456
3457         con = kzalloc(sizeof(*con) +
3458                         sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
3459                         sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
3460                         GFP_KERNEL);
3461         if (!con)
3462                 return -ENOMEM;
3463
3464         con->adev = adev;
3465         INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
3466         atomic_set(&con->ras_ce_count, 0);
3467         atomic_set(&con->ras_ue_count, 0);
3468
3469         con->objs = (struct ras_manager *)(con + 1);
3470
3471         amdgpu_ras_set_context(adev, con);
3472
3473         amdgpu_ras_check_supported(adev);
3474
3475         if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
3476                 /* set gfx block ras context feature for VEGA20 Gaming
3477                  * send ras disable cmd to ras ta during ras late init.
3478                  */
3479                 if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
3480                         con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
3481
3482                         return 0;
3483                 }
3484
3485                 r = 0;
3486                 goto release_con;
3487         }
3488
3489         con->update_channel_flag = false;
3490         con->features = 0;
3491         con->schema = 0;
3492         INIT_LIST_HEAD(&con->head);
3493         /* Might need get this flag from vbios. */
3494         con->flags = RAS_DEFAULT_FLAGS;
3495
3496         /* initialize nbio ras function ahead of any other
3497          * ras functions so hardware fatal error interrupt
3498          * can be enabled as early as possible */
3499         switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
3500         case IP_VERSION(7, 4, 0):
3501         case IP_VERSION(7, 4, 1):
3502         case IP_VERSION(7, 4, 4):
3503                 if (!adev->gmc.xgmi.connected_to_cpu)
3504                         adev->nbio.ras = &nbio_v7_4_ras;
3505                 break;
3506         case IP_VERSION(4, 3, 0):
3507                 if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
3508                         /* unlike other generation of nbio ras,
3509                          * nbio v4_3 only support fatal error interrupt
3510                          * to inform software that DF is freezed due to
3511                          * system fatal error event. driver should not
3512                          * enable nbio ras in such case. Instead,
3513                          * check DF RAS */
3514                         adev->nbio.ras = &nbio_v4_3_ras;
3515                 break;
3516         case IP_VERSION(7, 9, 0):
3517                 if (!adev->gmc.is_app_apu)
3518                         adev->nbio.ras = &nbio_v7_9_ras;
3519                 break;
3520         default:
3521                 /* nbio ras is not available */
3522                 break;
3523         }
3524
3525         /* nbio ras block needs to be enabled ahead of other ras blocks
3526          * to handle fatal error */
3527         r = amdgpu_nbio_ras_sw_init(adev);
3528         if (r)
3529                 return r;
3530
3531         if (adev->nbio.ras &&
3532             adev->nbio.ras->init_ras_controller_interrupt) {
3533                 r = adev->nbio.ras->init_ras_controller_interrupt(adev);
3534                 if (r)
3535                         goto release_con;
3536         }
3537
3538         if (adev->nbio.ras &&
3539             adev->nbio.ras->init_ras_err_event_athub_interrupt) {
3540                 r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
3541                 if (r)
3542                         goto release_con;
3543         }
3544
3545         /* Packed socket_id to ras feature mask bits[31:29] */
3546         if (adev->smuio.funcs &&
3547             adev->smuio.funcs->get_socket_id)
3548                 con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
3549                                         AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
3550
3551         /* Get RAS schema for particular SOC */
3552         con->schema = amdgpu_get_ras_schema(adev);
3553
3554         amdgpu_ras_init_reserved_vram_size(adev);
3555
3556         if (amdgpu_ras_fs_init(adev)) {
3557                 r = -EINVAL;
3558                 goto release_con;
3559         }
3560
3561         if (amdgpu_ras_aca_is_supported(adev)) {
3562                 if (amdgpu_aca_is_enabled(adev))
3563                         r = amdgpu_aca_init(adev);
3564                 else
3565                         r = amdgpu_mca_init(adev);
3566                 if (r)
3567                         goto release_con;
3568         }
3569
3570         dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
3571                  "hardware ability[%x] ras_mask[%x]\n",
3572                  adev->ras_hw_enabled, adev->ras_enabled);
3573
3574         return 0;
3575 release_con:
3576         amdgpu_ras_set_context(adev, NULL);
3577         kfree(con);
3578
3579         return r;
3580 }
3581
3582 int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
3583 {
3584         if (adev->gmc.xgmi.connected_to_cpu ||
3585             adev->gmc.is_app_apu)
3586                 return 1;
3587         return 0;
3588 }
3589
3590 static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
3591                                         struct ras_common_if *ras_block)
3592 {
3593         struct ras_query_if info = {
3594                 .head = *ras_block,
3595         };
3596
3597         if (!amdgpu_persistent_edc_harvesting_supported(adev))
3598                 return 0;
3599
3600         if (amdgpu_ras_query_error_status(adev, &info) != 0)
3601                 DRM_WARN("RAS init harvest failure");
3602
3603         if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
3604                 DRM_WARN("RAS init harvest reset failure");
3605
3606         return 0;
3607 }
3608
3609 bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
3610 {
3611        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3612
3613        if (!con)
3614                return false;
3615
3616        return con->poison_supported;
3617 }
3618
3619 /* helper function to handle common stuff in ip late init phase */
3620 int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
3621                          struct ras_common_if *ras_block)
3622 {
3623         struct amdgpu_ras_block_object *ras_obj = NULL;
3624         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3625         struct ras_query_if *query_info;
3626         unsigned long ue_count, ce_count;
3627         int r;
3628
3629         /* disable RAS feature per IP block if it is not supported */
3630         if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
3631                 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
3632                 return 0;
3633         }
3634
3635         r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
3636         if (r) {
3637                 if (adev->in_suspend || amdgpu_in_reset(adev)) {
3638                         /* in resume phase, if fail to enable ras,
3639                          * clean up all ras fs nodes, and disable ras */
3640                         goto cleanup;
3641                 } else
3642                         return r;
3643         }
3644
3645         /* check for errors on warm reset edc persisant supported ASIC */
3646         amdgpu_persistent_edc_harvesting(adev, ras_block);
3647
3648         /* in resume phase, no need to create ras fs node */
3649         if (adev->in_suspend || amdgpu_in_reset(adev))
3650                 return 0;
3651
3652         ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
3653         if (ras_obj->ras_cb || (ras_obj->hw_ops &&
3654             (ras_obj->hw_ops->query_poison_status ||
3655             ras_obj->hw_ops->handle_poison_consumption))) {
3656                 r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
3657                 if (r)
3658                         goto cleanup;
3659         }
3660
3661         if (ras_obj->hw_ops &&
3662             (ras_obj->hw_ops->query_ras_error_count ||
3663              ras_obj->hw_ops->query_ras_error_status)) {
3664                 r = amdgpu_ras_sysfs_create(adev, ras_block);
3665                 if (r)
3666                         goto interrupt;
3667
3668                 /* Those are the cached values at init.
3669                  */
3670                 query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
3671                 if (!query_info)
3672                         return -ENOMEM;
3673                 memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
3674
3675                 if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
3676                         atomic_set(&con->ras_ce_count, ce_count);
3677                         atomic_set(&con->ras_ue_count, ue_count);
3678                 }
3679
3680                 kfree(query_info);
3681         }
3682
3683         return 0;
3684
3685 interrupt:
3686         if (ras_obj->ras_cb)
3687                 amdgpu_ras_interrupt_remove_handler(adev, ras_block);
3688 cleanup:
3689         amdgpu_ras_feature_enable(adev, ras_block, 0);
3690         return r;
3691 }
3692
3693 static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
3694                          struct ras_common_if *ras_block)
3695 {
3696         return amdgpu_ras_block_late_init(adev, ras_block);
3697 }
3698
3699 /* helper function to remove ras fs node and interrupt handler */
3700 void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
3701                           struct ras_common_if *ras_block)
3702 {
3703         struct amdgpu_ras_block_object *ras_obj;
3704         if (!ras_block)
3705                 return;
3706
3707         amdgpu_ras_sysfs_remove(adev, ras_block);
3708
3709         ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
3710         if (ras_obj->ras_cb)
3711                 amdgpu_ras_interrupt_remove_handler(adev, ras_block);
3712 }
3713
3714 static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
3715                           struct ras_common_if *ras_block)
3716 {
3717         return amdgpu_ras_block_late_fini(adev, ras_block);
3718 }
3719
3720 /* do some init work after IP late init as dependence.
3721  * and it runs in resume/gpu reset/booting up cases.
3722  */
3723 void amdgpu_ras_resume(struct amdgpu_device *adev)
3724 {
3725         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3726         struct ras_manager *obj, *tmp;
3727
3728         if (!adev->ras_enabled || !con) {
3729                 /* clean ras context for VEGA20 Gaming after send ras disable cmd */
3730                 amdgpu_release_ras_context(adev);
3731
3732                 return;
3733         }
3734
3735         if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
3736                 /* Set up all other IPs which are not implemented. There is a
3737                  * tricky thing that IP's actual ras error type should be
3738                  * MULTI_UNCORRECTABLE, but as driver does not handle it, so
3739                  * ERROR_NONE make sense anyway.
3740                  */
3741                 amdgpu_ras_enable_all_features(adev, 1);
3742
3743                 /* We enable ras on all hw_supported block, but as boot
3744                  * parameter might disable some of them and one or more IP has
3745                  * not implemented yet. So we disable them on behalf.
3746                  */
3747                 list_for_each_entry_safe(obj, tmp, &con->head, node) {
3748                         if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
3749                                 amdgpu_ras_feature_enable(adev, &obj->head, 0);
3750                                 /* there should be no any reference. */
3751                                 WARN_ON(alive_obj(obj));
3752                         }
3753                 }
3754         }
3755 }
3756
3757 void amdgpu_ras_suspend(struct amdgpu_device *adev)
3758 {
3759         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3760
3761         if (!adev->ras_enabled || !con)
3762                 return;
3763
3764         amdgpu_ras_disable_all_features(adev, 0);
3765         /* Make sure all ras objects are disabled. */
3766         if (AMDGPU_RAS_GET_FEATURES(con->features))
3767                 amdgpu_ras_disable_all_features(adev, 1);
3768 }
3769
3770 int amdgpu_ras_late_init(struct amdgpu_device *adev)
3771 {
3772         struct amdgpu_ras_block_list *node, *tmp;
3773         struct amdgpu_ras_block_object *obj;
3774         int r;
3775
3776         amdgpu_ras_event_mgr_init(adev);
3777
3778         if (amdgpu_ras_aca_is_supported(adev)) {
3779                 if (amdgpu_in_reset(adev)) {
3780                         if (amdgpu_aca_is_enabled(adev))
3781                                 r = amdgpu_aca_reset(adev);
3782                         else
3783                                 r = amdgpu_mca_reset(adev);
3784                         if (r)
3785                                 return r;
3786                 }
3787
3788                 if (!amdgpu_sriov_vf(adev)) {
3789                         if (amdgpu_aca_is_enabled(adev))
3790                                 amdgpu_ras_set_aca_debug_mode(adev, false);
3791                         else
3792                                 amdgpu_ras_set_mca_debug_mode(adev, false);
3793                 }
3794         }
3795
3796         /* Guest side doesn't need init ras feature */
3797         if (amdgpu_sriov_vf(adev))
3798                 return 0;
3799
3800         list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
3801                 obj = node->ras_obj;
3802                 if (!obj) {
3803                         dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
3804                         continue;
3805                 }
3806
3807                 if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
3808                         continue;
3809
3810                 if (obj->ras_late_init) {
3811                         r = obj->ras_late_init(adev, &obj->ras_comm);
3812                         if (r) {
3813                                 dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
3814                                         obj->ras_comm.name, r);
3815                                 return r;
3816                         }
3817                 } else
3818                         amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
3819         }
3820
3821         return 0;
3822 }
3823
3824 /* do some fini work before IP fini as dependence */
3825 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
3826 {
3827         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3828
3829         if (!adev->ras_enabled || !con)
3830                 return 0;
3831
3832
3833         /* Need disable ras on all IPs here before ip [hw/sw]fini */
3834         if (AMDGPU_RAS_GET_FEATURES(con->features))
3835                 amdgpu_ras_disable_all_features(adev, 0);
3836         amdgpu_ras_recovery_fini(adev);
3837         return 0;
3838 }
3839
3840 int amdgpu_ras_fini(struct amdgpu_device *adev)
3841 {
3842         struct amdgpu_ras_block_list *ras_node, *tmp;
3843         struct amdgpu_ras_block_object *obj = NULL;
3844         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3845
3846         if (!adev->ras_enabled || !con)
3847                 return 0;
3848
3849         list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
3850                 if (ras_node->ras_obj) {
3851                         obj = ras_node->ras_obj;
3852                         if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
3853                             obj->ras_fini)
3854                                 obj->ras_fini(adev, &obj->ras_comm);
3855                         else
3856                                 amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
3857                 }
3858
3859                 /* Clear ras blocks from ras_list and free ras block list node */
3860                 list_del(&ras_node->node);
3861                 kfree(ras_node);
3862         }
3863
3864         amdgpu_ras_fs_fini(adev);
3865         amdgpu_ras_interrupt_remove_all(adev);
3866
3867         if (amdgpu_ras_aca_is_supported(adev)) {
3868                 if (amdgpu_aca_is_enabled(adev))
3869                         amdgpu_aca_fini(adev);
3870                 else
3871                         amdgpu_mca_fini(adev);
3872         }
3873
3874         WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
3875
3876         if (AMDGPU_RAS_GET_FEATURES(con->features))
3877                 amdgpu_ras_disable_all_features(adev, 0);
3878
3879         cancel_delayed_work_sync(&con->ras_counte_delay_work);
3880
3881         amdgpu_ras_set_context(adev, NULL);
3882         kfree(con);
3883
3884         return 0;
3885 }
3886
3887 bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
3888 {
3889         struct amdgpu_ras *ras;
3890
3891         ras = amdgpu_ras_get_context(adev);
3892         if (!ras)
3893                 return false;
3894
3895         return atomic_read(&ras->fed);
3896 }
3897
3898 void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
3899 {
3900         struct amdgpu_ras *ras;
3901
3902         ras = amdgpu_ras_get_context(adev);
3903         if (ras)
3904                 atomic_set(&ras->fed, !!status);
3905 }
3906
3907 bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id)
3908 {
3909         return !(id & BIT_ULL(63));
3910 }
3911
3912 u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
3913 {
3914         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3915         u64 id;
3916
3917         switch (type) {
3918         case RAS_EVENT_TYPE_ISR:
3919                 id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]);
3920                 break;
3921         case RAS_EVENT_TYPE_INVALID:
3922         default:
3923                 id = BIT_ULL(63) | 0ULL;
3924                 break;
3925         }
3926
3927         return id;
3928 }
3929
3930 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
3931 {
3932         if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
3933                 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3934                 u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]);
3935
3936                 RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
3937                               "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
3938
3939                 amdgpu_ras_set_fed(adev, true);
3940                 ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
3941                 amdgpu_ras_reset_gpu(adev);
3942         }
3943 }
3944
3945 bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
3946 {
3947         if (adev->asic_type == CHIP_VEGA20 &&
3948             adev->pm.fw_version <= 0x283400) {
3949                 return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
3950                                 amdgpu_ras_intr_triggered();
3951         }
3952
3953         return false;
3954 }
3955
3956 void amdgpu_release_ras_context(struct amdgpu_device *adev)
3957 {
3958         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
3959
3960         if (!con)
3961                 return;
3962
3963         if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
3964                 con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
3965                 amdgpu_ras_set_context(adev, NULL);
3966                 kfree(con);
3967         }
3968 }
3969
3970 #ifdef CONFIG_X86_MCE_AMD
3971 static struct amdgpu_device *find_adev(uint32_t node_id)
3972 {
3973         int i;
3974         struct amdgpu_device *adev = NULL;
3975
3976         for (i = 0; i < mce_adev_list.num_gpu; i++) {
3977                 adev = mce_adev_list.devs[i];
3978
3979                 if (adev && adev->gmc.xgmi.connected_to_cpu &&
3980                     adev->gmc.xgmi.physical_node_id == node_id)
3981                         break;
3982                 adev = NULL;
3983         }
3984
3985         return adev;
3986 }
3987
3988 #define GET_MCA_IPID_GPUID(m)   (((m) >> 44) & 0xF)
3989 #define GET_UMC_INST(m)         (((m) >> 21) & 0x7)
3990 #define GET_CHAN_INDEX(m)       ((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
3991 #define GPU_ID_OFFSET           8
3992
3993 static int amdgpu_bad_page_notifier(struct notifier_block *nb,
3994                                     unsigned long val, void *data)
3995 {
3996         struct mce *m = (struct mce *)data;
3997         struct amdgpu_device *adev = NULL;
3998         uint32_t gpu_id = 0;
3999         uint32_t umc_inst = 0, ch_inst = 0;
4000
4001         /*
4002          * If the error was generated in UMC_V2, which belongs to GPU UMCs,
4003          * and error occurred in DramECC (Extended error code = 0) then only
4004          * process the error, else bail out.
4005          */
4006         if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
4007                     (XEC(m->status, 0x3f) == 0x0)))
4008                 return NOTIFY_DONE;
4009
4010         /*
4011          * If it is correctable error, return.
4012          */
4013         if (mce_is_correctable(m))
4014                 return NOTIFY_OK;
4015
4016         /*
4017          * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
4018          */
4019         gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
4020
4021         adev = find_adev(gpu_id);
4022         if (!adev) {
4023                 DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
4024                                                                 gpu_id);
4025                 return NOTIFY_DONE;
4026         }
4027
4028         /*
4029          * If it is uncorrectable error, then find out UMC instance and
4030          * channel index.
4031          */
4032         umc_inst = GET_UMC_INST(m->ipid);
4033         ch_inst = GET_CHAN_INDEX(m->ipid);
4034
4035         dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
4036                              umc_inst, ch_inst);
4037
4038         if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
4039                 return NOTIFY_OK;
4040         else
4041                 return NOTIFY_DONE;
4042 }
4043
4044 static struct notifier_block amdgpu_bad_page_nb = {
4045         .notifier_call  = amdgpu_bad_page_notifier,
4046         .priority       = MCE_PRIO_UC,
4047 };
4048
4049 static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
4050 {
4051         /*
4052          * Add the adev to the mce_adev_list.
4053          * During mode2 reset, amdgpu device is temporarily
4054          * removed from the mgpu_info list which can cause
4055          * page retirement to fail.
4056          * Use this list instead of mgpu_info to find the amdgpu
4057          * device on which the UMC error was reported.
4058          */
4059         mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
4060
4061         /*
4062          * Register the x86 notifier only once
4063          * with MCE subsystem.
4064          */
4065         if (notifier_registered == false) {
4066                 mce_register_decode_chain(&amdgpu_bad_page_nb);
4067                 notifier_registered = true;
4068         }
4069 }
4070 #endif
4071
4072 struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
4073 {
4074         if (!adev)
4075                 return NULL;
4076
4077         return adev->psp.ras_context.ras;
4078 }
4079
4080 int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
4081 {
4082         if (!adev)
4083                 return -EINVAL;
4084
4085         adev->psp.ras_context.ras = ras_con;
4086         return 0;
4087 }
4088
4089 /* check if ras is supported on block, say, sdma, gfx */
4090 int amdgpu_ras_is_supported(struct amdgpu_device *adev,
4091                 unsigned int block)
4092 {
4093         int ret = 0;
4094         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4095
4096         if (block >= AMDGPU_RAS_BLOCK_COUNT)
4097                 return 0;
4098
4099         ret = ras && (adev->ras_enabled & (1 << block));
4100
4101         /* For the special asic with mem ecc enabled but sram ecc
4102          * not enabled, even if the ras block is not supported on
4103          * .ras_enabled, if the asic supports poison mode and the
4104          * ras block has ras configuration, it can be considered
4105          * that the ras block supports ras function.
4106          */
4107         if (!ret &&
4108             (block == AMDGPU_RAS_BLOCK__GFX ||
4109              block == AMDGPU_RAS_BLOCK__SDMA ||
4110              block == AMDGPU_RAS_BLOCK__VCN ||
4111              block == AMDGPU_RAS_BLOCK__JPEG) &&
4112                 (amdgpu_ras_mask & (1 << block)) &&
4113             amdgpu_ras_is_poison_mode_supported(adev) &&
4114             amdgpu_ras_get_ras_block(adev, block, 0))
4115                 ret = 1;
4116
4117         return ret;
4118 }
4119
4120 int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
4121 {
4122         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
4123
4124         /* mode1 is the only selection for RMA status */
4125         if (ras->is_rma) {
4126                 ras->gpu_reset_flags = 0;
4127                 ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
4128         }
4129
4130         if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
4131                 amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
4132         return 0;
4133 }
4134
4135 int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
4136 {
4137         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
4138         int ret = 0;
4139
4140         if (con) {
4141                 ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
4142                 if (!ret)
4143                         con->is_aca_debug_mode = enable;
4144         }
4145
4146         return ret;
4147 }
4148
4149 int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
4150 {
4151         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
4152         int ret = 0;
4153
4154         if (con) {
4155                 if (amdgpu_aca_is_enabled(adev))
4156                         ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
4157                 else
4158                         ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
4159                 if (!ret)
4160                         con->is_aca_debug_mode = enable;
4161         }
4162
4163         return ret;
4164 }
4165
4166 bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
4167 {
4168         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
4169         const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
4170         const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
4171
4172         if (!con)
4173                 return false;
4174
4175         if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
4176             (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
4177                 return con->is_aca_debug_mode;
4178         else
4179                 return true;
4180 }
4181
4182 bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
4183                                      unsigned int *error_query_mode)
4184 {
4185         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
4186         const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
4187         const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
4188
4189         if (!con) {
4190                 *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
4191                 return false;
4192         }
4193
4194         if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode))
4195                 *error_query_mode =
4196                         (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
4197         else
4198                 *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
4199
4200         return true;
4201 }
4202
4203 /* Register each ip ras block into amdgpu ras */
4204 int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
4205                 struct amdgpu_ras_block_object *ras_block_obj)
4206 {
4207         struct amdgpu_ras_block_list *ras_node;
4208         if (!adev || !ras_block_obj)
4209                 return -EINVAL;
4210
4211         ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
4212         if (!ras_node)
4213                 return -ENOMEM;
4214
4215         INIT_LIST_HEAD(&ras_node->node);
4216         ras_node->ras_obj = ras_block_obj;
4217         list_add_tail(&ras_node->node, &adev->ras_list);
4218
4219         return 0;
4220 }
4221
4222 void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
4223 {
4224         if (!err_type_name)
4225                 return;
4226
4227         switch (err_type) {
4228         case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
4229                 sprintf(err_type_name, "correctable");
4230                 break;
4231         case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
4232                 sprintf(err_type_name, "uncorrectable");
4233                 break;
4234         default:
4235                 sprintf(err_type_name, "unknown");
4236                 break;
4237         }
4238 }
4239
4240 bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
4241                                          const struct amdgpu_ras_err_status_reg_entry *reg_entry,
4242                                          uint32_t instance,
4243                                          uint32_t *memory_id)
4244 {
4245         uint32_t err_status_lo_data, err_status_lo_offset;
4246
4247         if (!reg_entry)
4248                 return false;
4249
4250         err_status_lo_offset =
4251                 AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
4252                                             reg_entry->seg_lo, reg_entry->reg_lo);
4253         err_status_lo_data = RREG32(err_status_lo_offset);
4254
4255         if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
4256             !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
4257                 return false;
4258
4259         *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
4260
4261         return true;
4262 }
4263
4264 bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
4265                                        const struct amdgpu_ras_err_status_reg_entry *reg_entry,
4266                                        uint32_t instance,
4267                                        unsigned long *err_cnt)
4268 {
4269         uint32_t err_status_hi_data, err_status_hi_offset;
4270
4271         if (!reg_entry)
4272                 return false;
4273
4274         err_status_hi_offset =
4275                 AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
4276                                             reg_entry->seg_hi, reg_entry->reg_hi);
4277         err_status_hi_data = RREG32(err_status_hi_offset);
4278
4279         if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
4280             !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
4281                 /* keep the check here in case we need to refer to the result later */
4282                 dev_dbg(adev->dev, "Invalid err_info field\n");
4283
4284         /* read err count */
4285         *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
4286
4287         return true;
4288 }
4289
4290 void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
4291                                            const struct amdgpu_ras_err_status_reg_entry *reg_list,
4292                                            uint32_t reg_list_size,
4293                                            const struct amdgpu_ras_memory_id_entry *mem_list,
4294                                            uint32_t mem_list_size,
4295                                            uint32_t instance,
4296                                            uint32_t err_type,
4297                                            unsigned long *err_count)
4298 {
4299         uint32_t memory_id;
4300         unsigned long err_cnt;
4301         char err_type_name[16];
4302         uint32_t i, j;
4303
4304         for (i = 0; i < reg_list_size; i++) {
4305                 /* query memory_id from err_status_lo */
4306                 if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
4307                                                          instance, &memory_id))
4308                         continue;
4309
4310                 /* query err_cnt from err_status_hi */
4311                 if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
4312                                                        instance, &err_cnt) ||
4313                     !err_cnt)
4314                         continue;
4315
4316                 *err_count += err_cnt;
4317
4318                 /* log the errors */
4319                 amdgpu_ras_get_error_type_name(err_type, err_type_name);
4320                 if (!mem_list) {
4321                         /* memory_list is not supported */
4322                         dev_info(adev->dev,
4323                                  "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
4324                                  err_cnt, err_type_name,
4325                                  reg_list[i].block_name,
4326                                  instance, memory_id);
4327                 } else {
4328                         for (j = 0; j < mem_list_size; j++) {
4329                                 if (memory_id == mem_list[j].memory_id) {
4330                                         dev_info(adev->dev,
4331                                                  "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
4332                                                  err_cnt, err_type_name,
4333                                                  reg_list[i].block_name,
4334                                                  instance, mem_list[j].name);
4335                                         break;
4336                                 }
4337                         }
4338                 }
4339         }
4340 }
4341
4342 void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
4343                                            const struct amdgpu_ras_err_status_reg_entry *reg_list,
4344                                            uint32_t reg_list_size,
4345                                            uint32_t instance)
4346 {
4347         uint32_t err_status_lo_offset, err_status_hi_offset;
4348         uint32_t i;
4349
4350         for (i = 0; i < reg_list_size; i++) {
4351                 err_status_lo_offset =
4352                         AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
4353                                                     reg_list[i].seg_lo, reg_list[i].reg_lo);
4354                 err_status_hi_offset =
4355                         AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
4356                                                     reg_list[i].seg_hi, reg_list[i].reg_hi);
4357                 WREG32(err_status_lo_offset, 0);
4358                 WREG32(err_status_hi_offset, 0);
4359         }
4360 }
4361
4362 int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
4363 {
4364         memset(err_data, 0, sizeof(*err_data));
4365
4366         INIT_LIST_HEAD(&err_data->err_node_list);
4367
4368         return 0;
4369 }
4370
4371 static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
4372 {
4373         if (!err_node)
4374                 return;
4375
4376         list_del(&err_node->node);
4377         kvfree(err_node);
4378 }
4379
4380 void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
4381 {
4382         struct ras_err_node *err_node, *tmp;
4383
4384         list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
4385                 amdgpu_ras_error_node_release(err_node);
4386 }
4387
4388 static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
4389                                                              struct amdgpu_smuio_mcm_config_info *mcm_info)
4390 {
4391         struct ras_err_node *err_node;
4392         struct amdgpu_smuio_mcm_config_info *ref_id;
4393
4394         if (!err_data || !mcm_info)
4395                 return NULL;
4396
4397         for_each_ras_error(err_node, err_data) {
4398                 ref_id = &err_node->err_info.mcm_info;
4399
4400                 if (mcm_info->socket_id == ref_id->socket_id &&
4401                     mcm_info->die_id == ref_id->die_id)
4402                         return err_node;
4403         }
4404
4405         return NULL;
4406 }
4407
4408 static struct ras_err_node *amdgpu_ras_error_node_new(void)
4409 {
4410         struct ras_err_node *err_node;
4411
4412         err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
4413         if (!err_node)
4414                 return NULL;
4415
4416         INIT_LIST_HEAD(&err_node->node);
4417
4418         return err_node;
4419 }
4420
4421 static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
4422 {
4423         struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
4424         struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
4425         struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
4426         struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
4427
4428         if (unlikely(infoa->socket_id != infob->socket_id))
4429                 return infoa->socket_id - infob->socket_id;
4430         else
4431                 return infoa->die_id - infob->die_id;
4432
4433         return 0;
4434 }
4435
4436 static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
4437                                 struct amdgpu_smuio_mcm_config_info *mcm_info)
4438 {
4439         struct ras_err_node *err_node;
4440
4441         err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
4442         if (err_node)
4443                 return &err_node->err_info;
4444
4445         err_node = amdgpu_ras_error_node_new();
4446         if (!err_node)
4447                 return NULL;
4448
4449         INIT_LIST_HEAD(&err_node->err_info.err_addr_list);
4450
4451         memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
4452
4453         err_data->err_list_count++;
4454         list_add_tail(&err_node->node, &err_data->err_node_list);
4455         list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
4456
4457         return &err_node->err_info;
4458 }
4459
4460 void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr)
4461 {
4462         /* This function will be retired. */
4463         return;
4464 }
4465
4466 void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr)
4467 {
4468         list_del(&mca_err_addr->node);
4469         kfree(mca_err_addr);
4470 }
4471
4472 int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
4473                 struct amdgpu_smuio_mcm_config_info *mcm_info,
4474                 struct ras_err_addr *err_addr, u64 count)
4475 {
4476         struct ras_err_info *err_info;
4477
4478         if (!err_data || !mcm_info)
4479                 return -EINVAL;
4480
4481         if (!count)
4482                 return 0;
4483
4484         err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
4485         if (!err_info)
4486                 return -EINVAL;
4487
4488         if (err_addr && err_addr->err_status)
4489                 amdgpu_ras_add_mca_err_addr(err_info, err_addr);
4490
4491         err_info->ue_count += count;
4492         err_data->ue_count += count;
4493
4494         return 0;
4495 }
4496
4497 int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
4498                 struct amdgpu_smuio_mcm_config_info *mcm_info,
4499                 struct ras_err_addr *err_addr, u64 count)
4500 {
4501         struct ras_err_info *err_info;
4502
4503         if (!err_data || !mcm_info)
4504                 return -EINVAL;
4505
4506         if (!count)
4507                 return 0;
4508
4509         err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
4510         if (!err_info)
4511                 return -EINVAL;
4512
4513         err_info->ce_count += count;
4514         err_data->ce_count += count;
4515
4516         return 0;
4517 }
4518
4519 int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
4520                 struct amdgpu_smuio_mcm_config_info *mcm_info,
4521                 struct ras_err_addr *err_addr, u64 count)
4522 {
4523         struct ras_err_info *err_info;
4524
4525         if (!err_data || !mcm_info)
4526                 return -EINVAL;
4527
4528         if (!count)
4529                 return 0;
4530
4531         err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
4532         if (!err_info)
4533                 return -EINVAL;
4534
4535         if (err_addr && err_addr->err_status)
4536                 amdgpu_ras_add_mca_err_addr(err_info, err_addr);
4537
4538         err_info->de_count += count;
4539         err_data->de_count += count;
4540
4541         return 0;
4542 }
4543
4544 #define mmMP0_SMN_C2PMSG_92     0x1609C
4545 #define mmMP0_SMN_C2PMSG_126    0x160BE
4546 static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
4547                                                  u32 instance)
4548 {
4549         u32 socket_id, aid_id, hbm_id;
4550         u32 fw_status;
4551         u32 boot_error;
4552         u64 reg_addr;
4553
4554         /* The pattern for smn addressing in other SOC could be different from
4555          * the one for aqua_vanjaram. We should revisit the code if the pattern
4556          * is changed. In such case, replace the aqua_vanjaram implementation
4557          * with more common helper */
4558         reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
4559                    aqua_vanjaram_encode_ext_smn_addressing(instance);
4560         fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
4561
4562         reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
4563                    aqua_vanjaram_encode_ext_smn_addressing(instance);
4564         boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
4565
4566         socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
4567         aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
4568         hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);
4569
4570         if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
4571                 dev_info(adev->dev,
4572                          "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
4573                          socket_id, aid_id, hbm_id, fw_status);
4574
4575         if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
4576                 dev_info(adev->dev,
4577                          "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
4578                          socket_id, aid_id, fw_status);
4579
4580         if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
4581                 dev_info(adev->dev,
4582                          "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
4583                          socket_id, aid_id, fw_status);
4584
4585         if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
4586                 dev_info(adev->dev,
4587                          "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
4588                          socket_id, aid_id, fw_status);
4589
4590         if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
4591                 dev_info(adev->dev,
4592                          "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
4593                          socket_id, aid_id, fw_status);
4594
4595         if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
4596                 dev_info(adev->dev,
4597                          "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
4598                          socket_id, aid_id, fw_status);
4599
4600         if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
4601                 dev_info(adev->dev,
4602                          "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
4603                          socket_id, aid_id, hbm_id, fw_status);
4604
4605         if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
4606                 dev_info(adev->dev,
4607                          "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
4608                          socket_id, aid_id, hbm_id, fw_status);
4609 }
4610
4611 static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
4612                                            u32 instance)
4613 {
4614         u64 reg_addr;
4615         u32 reg_data;
4616         int retry_loop;
4617
4618         reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
4619                    aqua_vanjaram_encode_ext_smn_addressing(instance);
4620
4621         for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
4622                 reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
4623                 if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
4624                         return false;
4625                 else
4626                         msleep(1);
4627         }
4628
4629         return true;
4630 }
4631
4632 void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
4633 {
4634         u32 i;
4635
4636         for (i = 0; i < num_instances; i++) {
4637                 if (amdgpu_ras_boot_error_detected(adev, i))
4638                         amdgpu_ras_boot_time_error_reporting(adev, i);
4639         }
4640 }
4641
4642 int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
4643 {
4644         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
4645         struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
4646         uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
4647         int ret = 0;
4648
4649         mutex_lock(&con->page_rsv_lock);
4650         ret = amdgpu_vram_mgr_query_page_status(mgr, start);
4651         if (ret == -ENOENT)
4652                 ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
4653         mutex_unlock(&con->page_rsv_lock);
4654
4655         return ret;
4656 }
4657
4658 void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
4659                                 const char *fmt, ...)
4660 {
4661         struct va_format vaf;
4662         va_list args;
4663
4664         va_start(args, fmt);
4665         vaf.fmt = fmt;
4666         vaf.va = &args;
4667
4668         if (amdgpu_ras_event_id_is_valid(adev, event_id))
4669                 dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
4670         else
4671                 dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
4672
4673         va_end(args);
4674 }