drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/power_supply.h>
  29 #include <linux/kthread.h>
  30 #include <linux/module.h>
  31 #include <linux/console.h>
  32 #include <linux/slab.h>
  33 #include <linux/iommu.h>
  34 #include <linux/pci.h>
  35 #include <linux/pci-p2pdma.h>
  36 #include <linux/apple-gmux.h>
  37
  38 #include <drm/drm_aperture.h>
  39 #include <drm/drm_atomic_helper.h>
  40 #include <drm/drm_crtc_helper.h>
  41 #include <drm/drm_fb_helper.h>
  42 #include <drm/drm_probe_helper.h>
  43 #include <drm/amdgpu_drm.h>
  44 #include <linux/device.h>
  45 #include <linux/vgaarb.h>
  46 #include <linux/vga_switcheroo.h>
  47 #include <linux/efi.h>
  48 #include "amdgpu.h"
  49 #include "amdgpu_trace.h"
  50 #include "amdgpu_i2c.h"
  51 #include "atom.h"
  52 #include "amdgpu_atombios.h"
  53 #include "amdgpu_atomfirmware.h"
  54 #include "amd_pcie.h"
  55 #ifdef CONFIG_DRM_AMDGPU_SI
  56 #include "si.h"
  57 #endif
  58 #ifdef CONFIG_DRM_AMDGPU_CIK
  59 #include "cik.h"
  60 #endif
  61 #include "vi.h"
  62 #include "soc15.h"
  63 #include "nv.h"
  64 #include "bif/bif_4_1_d.h"
  65 #include <linux/firmware.h>
  66 #include "amdgpu_vf_error.h"
  67
  68 #include "amdgpu_amdkfd.h"
  69 #include "amdgpu_pm.h"
  70
  71 #include "amdgpu_xgmi.h"
  72 #include "amdgpu_ras.h"
  73 #include "amdgpu_pmu.h"
  74 #include "amdgpu_fru_eeprom.h"
  75 #include "amdgpu_reset.h"
  76 #include "amdgpu_virt.h"
  77 #include "amdgpu_dev_coredump.h"
  78
  79 #include <linux/suspend.h>
  80 #include <drm/task_barrier.h>
  81 #include <linux/pm_runtime.h>
  82
  83 #include <drm/drm_drv.h>
  84
  85 #if IS_ENABLED(CONFIG_X86)
  86 #include <asm/intel-family.h>
  87 #endif
  88
  89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  96
  97 #define AMDGPU_RESUME_MS                2000
  98 #define AMDGPU_MAX_RETRY_LIMIT          2
  99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
 101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
 102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
 103
 104 static const struct drm_driver amdgpu_kms_driver;
 105
 106 const char *amdgpu_asic_name[] = {
 107         "TAHITI",
 108         "PITCAIRN",
 109         "VERDE",
 110         "OLAND",
 111         "HAINAN",
 112         "BONAIRE",
 113         "KAVERI",
 114         "KABINI",
 115         "HAWAII",
 116         "MULLINS",
 117         "TOPAZ",
 118         "TONGA",
 119         "FIJI",
 120         "CARRIZO",
 121         "STONEY",
 122         "POLARIS10",
 123         "POLARIS11",
 124         "POLARIS12",
 125         "VEGAM",
 126         "VEGA10",
 127         "VEGA12",
 128         "VEGA20",
 129         "RAVEN",
 130         "ARCTURUS",
 131         "RENOIR",
 132         "ALDEBARAN",
 133         "NAVI10",
 134         "CYAN_SKILLFISH",
 135         "NAVI14",
 136         "NAVI12",
 137         "SIENNA_CICHLID",
 138         "NAVY_FLOUNDER",
 139         "VANGOGH",
 140         "DIMGREY_CAVEFISH",
 141         "BEIGE_GOBY",
 142         "YELLOW_CARP",
 143         "IP DISCOVERY",
 144         "LAST",
 145 };
 146
 147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
 148
 149 /**
 150  * DOC: pcie_replay_count
 151  *
 152  * The amdgpu driver provides a sysfs API for reporting the total number
 153  * of PCIe replays (NAKs)
 154  * The file pcie_replay_count is used for this and returns the total
 155  * number of replays as a sum of the NAKs generated and NAKs received
 156  */
 157
 158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 159                 struct device_attribute *attr, char *buf)
 160 {
 161         struct drm_device *ddev = dev_get_drvdata(dev);
 162         struct amdgpu_device *adev = drm_to_adev(ddev);
 163         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 164
 165         return sysfs_emit(buf, "%llu\n", cnt);
 166 }
 167
 168 static DEVICE_ATTR(pcie_replay_count, 0444,
 169                 amdgpu_device_get_pcie_replay_count, NULL);
 170
 171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
 172                                           struct bin_attribute *attr, char *buf,
 173                                           loff_t ppos, size_t count)
 174 {
 175         struct device *dev = kobj_to_dev(kobj);
 176         struct drm_device *ddev = dev_get_drvdata(dev);
 177         struct amdgpu_device *adev = drm_to_adev(ddev);
 178         ssize_t bytes_read;
 179
 180         switch (ppos) {
 181         case AMDGPU_SYS_REG_STATE_XGMI:
 182                 bytes_read = amdgpu_asic_get_reg_state(
 183                         adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
 184                 break;
 185         case AMDGPU_SYS_REG_STATE_WAFL:
 186                 bytes_read = amdgpu_asic_get_reg_state(
 187                         adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
 188                 break;
 189         case AMDGPU_SYS_REG_STATE_PCIE:
 190                 bytes_read = amdgpu_asic_get_reg_state(
 191                         adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
 192                 break;
 193         case AMDGPU_SYS_REG_STATE_USR:
 194                 bytes_read = amdgpu_asic_get_reg_state(
 195                         adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
 196                 break;
 197         case AMDGPU_SYS_REG_STATE_USR_1:
 198                 bytes_read = amdgpu_asic_get_reg_state(
 199                         adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
 200                 break;
 201         default:
 202                 return -EINVAL;
 203         }
 204
 205         return bytes_read;
 206 }
 207
 208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
 209          AMDGPU_SYS_REG_STATE_END);
 210
 211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
 212 {
 213         int ret;
 214
 215         if (!amdgpu_asic_get_reg_state_supported(adev))
 216                 return 0;
 217
 218         ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 219
 220         return ret;
 221 }
 222
 223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
 224 {
 225         if (!amdgpu_asic_get_reg_state_supported(adev))
 226                 return;
 227         sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 228 }
 229
 230 /**
 231  * DOC: board_info
 232  *
 233  * The amdgpu driver provides a sysfs API for giving board related information.
 234  * It provides the form factor information in the format
 235  *
 236  *   type : form factor
 237  *
 238  * Possible form factor values
 239  *
 240  * - "cem"              - PCIE CEM card
 241  * - "oam"              - Open Compute Accelerator Module
 242  * - "unknown"  - Not known
 243  *
 244  */
 245
 246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
 247                                             struct device_attribute *attr,
 248                                             char *buf)
 249 {
 250         struct drm_device *ddev = dev_get_drvdata(dev);
 251         struct amdgpu_device *adev = drm_to_adev(ddev);
 252         enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
 253         const char *pkg;
 254
 255         if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
 256                 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
 257
 258         switch (pkg_type) {
 259         case AMDGPU_PKG_TYPE_CEM:
 260                 pkg = "cem";
 261                 break;
 262         case AMDGPU_PKG_TYPE_OAM:
 263                 pkg = "oam";
 264                 break;
 265         default:
 266                 pkg = "unknown";
 267                 break;
 268         }
 269
 270         return sysfs_emit(buf, "%s : %s\n", "type", pkg);
 271 }
 272
 273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
 274
 275 static struct attribute *amdgpu_board_attrs[] = {
 276         &dev_attr_board_info.attr,
 277         NULL,
 278 };
 279
 280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
 281                                              struct attribute *attr, int n)
 282 {
 283         struct device *dev = kobj_to_dev(kobj);
 284         struct drm_device *ddev = dev_get_drvdata(dev);
 285         struct amdgpu_device *adev = drm_to_adev(ddev);
 286
 287         if (adev->flags & AMD_IS_APU)
 288                 return 0;
 289
 290         return attr->mode;
 291 }
 292
 293 static const struct attribute_group amdgpu_board_attrs_group = {
 294         .attrs = amdgpu_board_attrs,
 295         .is_visible = amdgpu_board_attrs_is_visible
 296 };
 297
 298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 299
 300
 301 /**
 302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 303  *
 304  * @dev: drm_device pointer
 305  *
 306  * Returns true if the device is a dGPU with ATPX power control,
 307  * otherwise return false.
 308  */
 309 bool amdgpu_device_supports_px(struct drm_device *dev)
 310 {
 311         struct amdgpu_device *adev = drm_to_adev(dev);
 312
 313         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 314                 return true;
 315         return false;
 316 }
 317
 318 /**
 319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 320  *
 321  * @dev: drm_device pointer
 322  *
 323  * Returns true if the device is a dGPU with ACPI power control,
 324  * otherwise return false.
 325  */
 326 bool amdgpu_device_supports_boco(struct drm_device *dev)
 327 {
 328         struct amdgpu_device *adev = drm_to_adev(dev);
 329
 330         if (adev->has_pr3 ||
 331             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 332                 return true;
 333         return false;
 334 }
 335
 336 /**
 337  * amdgpu_device_supports_baco - Does the device support BACO
 338  *
 339  * @dev: drm_device pointer
 340  *
 341  * Return:
 342  * 1 if the device supporte BACO;
 343  * 3 if the device support MACO (only works if BACO is supported)
 344  * otherwise return 0.
 345  */
 346 int amdgpu_device_supports_baco(struct drm_device *dev)
 347 {
 348         struct amdgpu_device *adev = drm_to_adev(dev);
 349
 350         return amdgpu_asic_supports_baco(adev);
 351 }
 352
 353 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
 354 {
 355         struct drm_device *dev;
 356         int bamaco_support;
 357
 358         dev = adev_to_drm(adev);
 359
 360         adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
 361         bamaco_support = amdgpu_device_supports_baco(dev);
 362
 363         switch (amdgpu_runtime_pm) {
 364         case 2:
 365                 if (bamaco_support & MACO_SUPPORT) {
 366                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 367                         dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
 368                 } else if (bamaco_support == BACO_SUPPORT) {
 369                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 370                         dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
 371                 }
 372                 break;
 373         case 1:
 374                 if (bamaco_support & BACO_SUPPORT) {
 375                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 376                         dev_info(adev->dev, "Forcing BACO for runtime pm\n");
 377                 }
 378                 break;
 379         case -1:
 380         case -2:
 381                 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
 382                         adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
 383                         dev_info(adev->dev, "Using ATPX for runtime pm\n");
 384                 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
 385                         adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
 386                         dev_info(adev->dev, "Using BOCO for runtime pm\n");
 387                 } else {
 388                         if (!bamaco_support)
 389                                 goto no_runtime_pm;
 390
 391                         switch (adev->asic_type) {
 392                         case CHIP_VEGA20:
 393                         case CHIP_ARCTURUS:
 394                                 /* BACO are not supported on vega20 and arctrus */
 395                                 break;
 396                         case CHIP_VEGA10:
 397                                 /* enable BACO as runpm mode if noretry=0 */
 398                                 if (!adev->gmc.noretry)
 399                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 400                                 break;
 401                         default:
 402                                 /* enable BACO as runpm mode on CI+ */
 403                                 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 404                                 break;
 405                         }
 406
 407                         if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
 408                                 if (bamaco_support & MACO_SUPPORT) {
 409                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 410                                         dev_info(adev->dev, "Using BAMACO for runtime pm\n");
 411                                 } else {
 412                                         dev_info(adev->dev, "Using BACO for runtime pm\n");
 413                                 }
 414                         }
 415                 }
 416                 break;
 417         case 0:
 418                 dev_info(adev->dev, "runtime pm is manually disabled\n");
 419                 break;
 420         default:
 421                 break;
 422         }
 423
 424 no_runtime_pm:
 425         if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
 426                 dev_info(adev->dev, "Runtime PM not available\n");
 427 }
 428 /**
 429  * amdgpu_device_supports_smart_shift - Is the device dGPU with
 430  * smart shift support
 431  *
 432  * @dev: drm_device pointer
 433  *
 434  * Returns true if the device is a dGPU with Smart Shift support,
 435  * otherwise returns false.
 436  */
 437 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 438 {
 439         return (amdgpu_device_supports_boco(dev) &&
 440                 amdgpu_acpi_is_power_shift_control_supported());
 441 }
 442
 443 /*
 444  * VRAM access helper functions
 445  */
 446
 447 /**
 448  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 449  *
 450  * @adev: amdgpu_device pointer
 451  * @pos: offset of the buffer in vram
 452  * @buf: virtual address of the buffer in system memory
 453  * @size: read/write size, sizeof(@buf) must > @size
 454  * @write: true - write to vram, otherwise - read from vram
 455  */
 456 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 457                              void *buf, size_t size, bool write)
 458 {
 459         unsigned long flags;
 460         uint32_t hi = ~0, tmp = 0;
 461         uint32_t *data = buf;
 462         uint64_t last;
 463         int idx;
 464
 465         if (!drm_dev_enter(adev_to_drm(adev), &idx))
 466                 return;
 467
 468         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 469
 470         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 471         for (last = pos + size; pos < last; pos += 4) {
 472                 tmp = pos >> 31;
 473
 474                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 475                 if (tmp != hi) {
 476                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 477                         hi = tmp;
 478                 }
 479                 if (write)
 480                         WREG32_NO_KIQ(mmMM_DATA, *data++);
 481                 else
 482                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
 483         }
 484
 485         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 486         drm_dev_exit(idx);
 487 }
 488
 489 /**
 490  * amdgpu_device_aper_access - access vram by vram aperature
 491  *
 492  * @adev: amdgpu_device pointer
 493  * @pos: offset of the buffer in vram
 494  * @buf: virtual address of the buffer in system memory
 495  * @size: read/write size, sizeof(@buf) must > @size
 496  * @write: true - write to vram, otherwise - read from vram
 497  *
 498  * The return value means how many bytes have been transferred.
 499  */
 500 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 501                                  void *buf, size_t size, bool write)
 502 {
 503 #ifdef CONFIG_64BIT
 504         void __iomem *addr;
 505         size_t count = 0;
 506         uint64_t last;
 507
 508         if (!adev->mman.aper_base_kaddr)
 509                 return 0;
 510
 511         last = min(pos + size, adev->gmc.visible_vram_size);
 512         if (last > pos) {
 513                 addr = adev->mman.aper_base_kaddr + pos;
 514                 count = last - pos;
 515
 516                 if (write) {
 517                         memcpy_toio(addr, buf, count);
 518                         /* Make sure HDP write cache flush happens without any reordering
 519                          * after the system memory contents are sent over PCIe device
 520                          */
 521                         mb();
 522                         amdgpu_device_flush_hdp(adev, NULL);
 523                 } else {
 524                         amdgpu_device_invalidate_hdp(adev, NULL);
 525                         /* Make sure HDP read cache is invalidated before issuing a read
 526                          * to the PCIe device
 527                          */
 528                         mb();
 529                         memcpy_fromio(buf, addr, count);
 530                 }
 531
 532         }
 533
 534         return count;
 535 #else
 536         return 0;
 537 #endif
 538 }
 539
 540 /**
 541  * amdgpu_device_vram_access - read/write a buffer in vram
 542  *
 543  * @adev: amdgpu_device pointer
 544  * @pos: offset of the buffer in vram
 545  * @buf: virtual address of the buffer in system memory
 546  * @size: read/write size, sizeof(@buf) must > @size
 547  * @write: true - write to vram, otherwise - read from vram
 548  */
 549 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 550                                void *buf, size_t size, bool write)
 551 {
 552         size_t count;
 553
 554         /* try to using vram apreature to access vram first */
 555         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 556         size -= count;
 557         if (size) {
 558                 /* using MM to access rest vram */
 559                 pos += count;
 560                 buf += count;
 561                 amdgpu_device_mm_access(adev, pos, buf, size, write);
 562         }
 563 }
 564
 565 /*
 566  * register access helper functions.
 567  */
 568
 569 /* Check if hw access should be skipped because of hotplug or device error */
 570 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 571 {
 572         if (adev->no_hw_access)
 573                 return true;
 574
 575 #ifdef CONFIG_LOCKDEP
 576         /*
 577          * This is a bit complicated to understand, so worth a comment. What we assert
 578          * here is that the GPU reset is not running on another thread in parallel.
 579          *
 580          * For this we trylock the read side of the reset semaphore, if that succeeds
 581          * we know that the reset is not running in paralell.
 582          *
 583          * If the trylock fails we assert that we are either already holding the read
 584          * side of the lock or are the reset thread itself and hold the write side of
 585          * the lock.
 586          */
 587         if (in_task()) {
 588                 if (down_read_trylock(&adev->reset_domain->sem))
 589                         up_read(&adev->reset_domain->sem);
 590                 else
 591                         lockdep_assert_held(&adev->reset_domain->sem);
 592         }
 593 #endif
 594         return false;
 595 }
 596
 597 /**
 598  * amdgpu_device_rreg - read a memory mapped IO or indirect register
 599  *
 600  * @adev: amdgpu_device pointer
 601  * @reg: dword aligned register offset
 602  * @acc_flags: access flags which require special behavior
 603  *
 604  * Returns the 32 bit value from the offset specified.
 605  */
 606 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 607                             uint32_t reg, uint32_t acc_flags)
 608 {
 609         uint32_t ret;
 610
 611         if (amdgpu_device_skip_hw_access(adev))
 612                 return 0;
 613
 614         if ((reg * 4) < adev->rmmio_size) {
 615                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 616                     amdgpu_sriov_runtime(adev) &&
 617                     down_read_trylock(&adev->reset_domain->sem)) {
 618                         ret = amdgpu_kiq_rreg(adev, reg, 0);
 619                         up_read(&adev->reset_domain->sem);
 620                 } else {
 621                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 622                 }
 623         } else {
 624                 ret = adev->pcie_rreg(adev, reg * 4);
 625         }
 626
 627         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 628
 629         return ret;
 630 }
 631
 632 /*
 633  * MMIO register read with bytes helper functions
 634  * @offset:bytes offset from MMIO start
 635  */
 636
 637 /**
 638  * amdgpu_mm_rreg8 - read a memory mapped IO register
 639  *
 640  * @adev: amdgpu_device pointer
 641  * @offset: byte aligned register offset
 642  *
 643  * Returns the 8 bit value from the offset specified.
 644  */
 645 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 646 {
 647         if (amdgpu_device_skip_hw_access(adev))
 648                 return 0;
 649
 650         if (offset < adev->rmmio_size)
 651                 return (readb(adev->rmmio + offset));
 652         BUG();
 653 }
 654
 655
 656 /**
 657  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
 658  *
 659  * @adev: amdgpu_device pointer
 660  * @reg: dword aligned register offset
 661  * @acc_flags: access flags which require special behavior
 662  * @xcc_id: xcc accelerated compute core id
 663  *
 664  * Returns the 32 bit value from the offset specified.
 665  */
 666 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
 667                                 uint32_t reg, uint32_t acc_flags,
 668                                 uint32_t xcc_id)
 669 {
 670         uint32_t ret, rlcg_flag;
 671
 672         if (amdgpu_device_skip_hw_access(adev))
 673                 return 0;
 674
 675         if ((reg * 4) < adev->rmmio_size) {
 676                 if (amdgpu_sriov_vf(adev) &&
 677                     !amdgpu_sriov_runtime(adev) &&
 678                     adev->gfx.rlc.rlcg_reg_access_supported &&
 679                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 680                                                          GC_HWIP, false,
 681                                                          &rlcg_flag)) {
 682                         ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
 683                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 684                     amdgpu_sriov_runtime(adev) &&
 685                     down_read_trylock(&adev->reset_domain->sem)) {
 686                         ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
 687                         up_read(&adev->reset_domain->sem);
 688                 } else {
 689                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 690                 }
 691         } else {
 692                 ret = adev->pcie_rreg(adev, reg * 4);
 693         }
 694
 695         return ret;
 696 }
 697
 698 /*
 699  * MMIO register write with bytes helper functions
 700  * @offset:bytes offset from MMIO start
 701  * @value: the value want to be written to the register
 702  */
 703
 704 /**
 705  * amdgpu_mm_wreg8 - read a memory mapped IO register
 706  *
 707  * @adev: amdgpu_device pointer
 708  * @offset: byte aligned register offset
 709  * @value: 8 bit value to write
 710  *
 711  * Writes the value specified to the offset specified.
 712  */
 713 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 714 {
 715         if (amdgpu_device_skip_hw_access(adev))
 716                 return;
 717
 718         if (offset < adev->rmmio_size)
 719                 writeb(value, adev->rmmio + offset);
 720         else
 721                 BUG();
 722 }
 723
 724 /**
 725  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 726  *
 727  * @adev: amdgpu_device pointer
 728  * @reg: dword aligned register offset
 729  * @v: 32 bit value to write to the register
 730  * @acc_flags: access flags which require special behavior
 731  *
 732  * Writes the value specified to the offset specified.
 733  */
 734 void amdgpu_device_wreg(struct amdgpu_device *adev,
 735                         uint32_t reg, uint32_t v,
 736                         uint32_t acc_flags)
 737 {
 738         if (amdgpu_device_skip_hw_access(adev))
 739                 return;
 740
 741         if ((reg * 4) < adev->rmmio_size) {
 742                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 743                     amdgpu_sriov_runtime(adev) &&
 744                     down_read_trylock(&adev->reset_domain->sem)) {
 745                         amdgpu_kiq_wreg(adev, reg, v, 0);
 746                         up_read(&adev->reset_domain->sem);
 747                 } else {
 748                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 749                 }
 750         } else {
 751                 adev->pcie_wreg(adev, reg * 4, v);
 752         }
 753
 754         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 755 }
 756
 757 /**
 758  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 759  *
 760  * @adev: amdgpu_device pointer
 761  * @reg: mmio/rlc register
 762  * @v: value to write
 763  * @xcc_id: xcc accelerated compute core id
 764  *
 765  * this function is invoked only for the debugfs register access
 766  */
 767 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 768                              uint32_t reg, uint32_t v,
 769                              uint32_t xcc_id)
 770 {
 771         if (amdgpu_device_skip_hw_access(adev))
 772                 return;
 773
 774         if (amdgpu_sriov_fullaccess(adev) &&
 775             adev->gfx.rlc.funcs &&
 776             adev->gfx.rlc.funcs->is_rlcg_access_range) {
 777                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 778                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
 779         } else if ((reg * 4) >= adev->rmmio_size) {
 780                 adev->pcie_wreg(adev, reg * 4, v);
 781         } else {
 782                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 783         }
 784 }
 785
 786 /**
 787  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
 788  *
 789  * @adev: amdgpu_device pointer
 790  * @reg: dword aligned register offset
 791  * @v: 32 bit value to write to the register
 792  * @acc_flags: access flags which require special behavior
 793  * @xcc_id: xcc accelerated compute core id
 794  *
 795  * Writes the value specified to the offset specified.
 796  */
 797 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
 798                         uint32_t reg, uint32_t v,
 799                         uint32_t acc_flags, uint32_t xcc_id)
 800 {
 801         uint32_t rlcg_flag;
 802
 803         if (amdgpu_device_skip_hw_access(adev))
 804                 return;
 805
 806         if ((reg * 4) < adev->rmmio_size) {
 807                 if (amdgpu_sriov_vf(adev) &&
 808                     !amdgpu_sriov_runtime(adev) &&
 809                     adev->gfx.rlc.rlcg_reg_access_supported &&
 810                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 811                                                          GC_HWIP, true,
 812                                                          &rlcg_flag)) {
 813                         amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
 814                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 815                     amdgpu_sriov_runtime(adev) &&
 816                     down_read_trylock(&adev->reset_domain->sem)) {
 817                         amdgpu_kiq_wreg(adev, reg, v, xcc_id);
 818                         up_read(&adev->reset_domain->sem);
 819                 } else {
 820                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 821                 }
 822         } else {
 823                 adev->pcie_wreg(adev, reg * 4, v);
 824         }
 825 }
 826
 827 /**
 828  * amdgpu_device_indirect_rreg - read an indirect register
 829  *
 830  * @adev: amdgpu_device pointer
 831  * @reg_addr: indirect register address to read from
 832  *
 833  * Returns the value of indirect register @reg_addr
 834  */
 835 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 836                                 u32 reg_addr)
 837 {
 838         unsigned long flags, pcie_index, pcie_data;
 839         void __iomem *pcie_index_offset;
 840         void __iomem *pcie_data_offset;
 841         u32 r;
 842
 843         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 844         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 845
 846         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 847         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 848         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 849
 850         writel(reg_addr, pcie_index_offset);
 851         readl(pcie_index_offset);
 852         r = readl(pcie_data_offset);
 853         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 854
 855         return r;
 856 }
 857
 858 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 859                                     u64 reg_addr)
 860 {
 861         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 862         u32 r;
 863         void __iomem *pcie_index_offset;
 864         void __iomem *pcie_index_hi_offset;
 865         void __iomem *pcie_data_offset;
 866
 867         if (unlikely(!adev->nbio.funcs)) {
 868                 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
 869                 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
 870         } else {
 871                 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 872                 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 873         }
 874
 875         if (reg_addr >> 32) {
 876                 if (unlikely(!adev->nbio.funcs))
 877                         pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
 878                 else
 879                         pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 880         } else {
 881                 pcie_index_hi = 0;
 882         }
 883
 884         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 885         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 886         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 887         if (pcie_index_hi != 0)
 888                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 889                                 pcie_index_hi * 4;
 890
 891         writel(reg_addr, pcie_index_offset);
 892         readl(pcie_index_offset);
 893         if (pcie_index_hi != 0) {
 894                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 895                 readl(pcie_index_hi_offset);
 896         }
 897         r = readl(pcie_data_offset);
 898
 899         /* clear the high bits */
 900         if (pcie_index_hi != 0) {
 901                 writel(0, pcie_index_hi_offset);
 902                 readl(pcie_index_hi_offset);
 903         }
 904
 905         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 906
 907         return r;
 908 }
 909
 910 /**
 911  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 912  *
 913  * @adev: amdgpu_device pointer
 914  * @reg_addr: indirect register address to read from
 915  *
 916  * Returns the value of indirect register @reg_addr
 917  */
 918 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 919                                   u32 reg_addr)
 920 {
 921         unsigned long flags, pcie_index, pcie_data;
 922         void __iomem *pcie_index_offset;
 923         void __iomem *pcie_data_offset;
 924         u64 r;
 925
 926         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 927         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 928
 929         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 930         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 931         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 932
 933         /* read low 32 bits */
 934         writel(reg_addr, pcie_index_offset);
 935         readl(pcie_index_offset);
 936         r = readl(pcie_data_offset);
 937         /* read high 32 bits */
 938         writel(reg_addr + 4, pcie_index_offset);
 939         readl(pcie_index_offset);
 940         r |= ((u64)readl(pcie_data_offset) << 32);
 941         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 942
 943         return r;
 944 }
 945
 946 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
 947                                   u64 reg_addr)
 948 {
 949         unsigned long flags, pcie_index, pcie_data;
 950         unsigned long pcie_index_hi = 0;
 951         void __iomem *pcie_index_offset;
 952         void __iomem *pcie_index_hi_offset;
 953         void __iomem *pcie_data_offset;
 954         u64 r;
 955
 956         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 957         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 958         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
 959                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 960
 961         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 962         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 963         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 964         if (pcie_index_hi != 0)
 965                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 966                         pcie_index_hi * 4;
 967
 968         /* read low 32 bits */
 969         writel(reg_addr, pcie_index_offset);
 970         readl(pcie_index_offset);
 971         if (pcie_index_hi != 0) {
 972                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 973                 readl(pcie_index_hi_offset);
 974         }
 975         r = readl(pcie_data_offset);
 976         /* read high 32 bits */
 977         writel(reg_addr + 4, pcie_index_offset);
 978         readl(pcie_index_offset);
 979         if (pcie_index_hi != 0) {
 980                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 981                 readl(pcie_index_hi_offset);
 982         }
 983         r |= ((u64)readl(pcie_data_offset) << 32);
 984
 985         /* clear the high bits */
 986         if (pcie_index_hi != 0) {
 987                 writel(0, pcie_index_hi_offset);
 988                 readl(pcie_index_hi_offset);
 989         }
 990
 991         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 992
 993         return r;
 994 }
 995
 996 /**
 997  * amdgpu_device_indirect_wreg - write an indirect register address
 998  *
 999  * @adev: amdgpu_device pointer
1000  * @reg_addr: indirect register offset
1001  * @reg_data: indirect register data
1002  *
1003  */
1004 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1005                                  u32 reg_addr, u32 reg_data)
1006 {
1007         unsigned long flags, pcie_index, pcie_data;
1008         void __iomem *pcie_index_offset;
1009         void __iomem *pcie_data_offset;
1010
1011         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013
1014         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017
1018         writel(reg_addr, pcie_index_offset);
1019         readl(pcie_index_offset);
1020         writel(reg_data, pcie_data_offset);
1021         readl(pcie_data_offset);
1022         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023 }
1024
1025 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026                                      u64 reg_addr, u32 reg_data)
1027 {
1028         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029         void __iomem *pcie_index_offset;
1030         void __iomem *pcie_index_hi_offset;
1031         void __iomem *pcie_data_offset;
1032
1033         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1035         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1036                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037         else
1038                 pcie_index_hi = 0;
1039
1040         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043         if (pcie_index_hi != 0)
1044                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045                                 pcie_index_hi * 4;
1046
1047         writel(reg_addr, pcie_index_offset);
1048         readl(pcie_index_offset);
1049         if (pcie_index_hi != 0) {
1050                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051                 readl(pcie_index_hi_offset);
1052         }
1053         writel(reg_data, pcie_data_offset);
1054         readl(pcie_data_offset);
1055
1056         /* clear the high bits */
1057         if (pcie_index_hi != 0) {
1058                 writel(0, pcie_index_hi_offset);
1059                 readl(pcie_index_hi_offset);
1060         }
1061
1062         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063 }
1064
1065 /**
1066  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067  *
1068  * @adev: amdgpu_device pointer
1069  * @reg_addr: indirect register offset
1070  * @reg_data: indirect register data
1071  *
1072  */
1073 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1074                                    u32 reg_addr, u64 reg_data)
1075 {
1076         unsigned long flags, pcie_index, pcie_data;
1077         void __iomem *pcie_index_offset;
1078         void __iomem *pcie_data_offset;
1079
1080         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082
1083         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086
1087         /* write low 32 bits */
1088         writel(reg_addr, pcie_index_offset);
1089         readl(pcie_index_offset);
1090         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091         readl(pcie_data_offset);
1092         /* write high 32 bits */
1093         writel(reg_addr + 4, pcie_index_offset);
1094         readl(pcie_index_offset);
1095         writel((u32)(reg_data >> 32), pcie_data_offset);
1096         readl(pcie_data_offset);
1097         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098 }
1099
1100 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101                                    u64 reg_addr, u64 reg_data)
1102 {
1103         unsigned long flags, pcie_index, pcie_data;
1104         unsigned long pcie_index_hi = 0;
1105         void __iomem *pcie_index_offset;
1106         void __iomem *pcie_index_hi_offset;
1107         void __iomem *pcie_data_offset;
1108
1109         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113
1114         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117         if (pcie_index_hi != 0)
1118                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119                                 pcie_index_hi * 4;
1120
1121         /* write low 32 bits */
1122         writel(reg_addr, pcie_index_offset);
1123         readl(pcie_index_offset);
1124         if (pcie_index_hi != 0) {
1125                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126                 readl(pcie_index_hi_offset);
1127         }
1128         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129         readl(pcie_data_offset);
1130         /* write high 32 bits */
1131         writel(reg_addr + 4, pcie_index_offset);
1132         readl(pcie_index_offset);
1133         if (pcie_index_hi != 0) {
1134                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135                 readl(pcie_index_hi_offset);
1136         }
1137         writel((u32)(reg_data >> 32), pcie_data_offset);
1138         readl(pcie_data_offset);
1139
1140         /* clear the high bits */
1141         if (pcie_index_hi != 0) {
1142                 writel(0, pcie_index_hi_offset);
1143                 readl(pcie_index_hi_offset);
1144         }
1145
1146         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147 }
1148
1149 /**
1150  * amdgpu_device_get_rev_id - query device rev_id
1151  *
1152  * @adev: amdgpu_device pointer
1153  *
1154  * Return device rev_id
1155  */
1156 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157 {
1158         return adev->nbio.funcs->get_rev_id(adev);
1159 }
1160
1161 /**
1162  * amdgpu_invalid_rreg - dummy reg read function
1163  *
1164  * @adev: amdgpu_device pointer
1165  * @reg: offset of register
1166  *
1167  * Dummy register read function.  Used for register blocks
1168  * that certain asics don't have (all asics).
1169  * Returns the value in the register.
1170  */
1171 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172 {
1173         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174         BUG();
1175         return 0;
1176 }
1177
1178 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179 {
1180         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181         BUG();
1182         return 0;
1183 }
1184
1185 /**
1186  * amdgpu_invalid_wreg - dummy reg write function
1187  *
1188  * @adev: amdgpu_device pointer
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196 {
1197         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198                   reg, v);
1199         BUG();
1200 }
1201
1202 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203 {
1204         DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205                   reg, v);
1206         BUG();
1207 }
1208
1209 /**
1210  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211  *
1212  * @adev: amdgpu_device pointer
1213  * @reg: offset of register
1214  *
1215  * Dummy register read function.  Used for register blocks
1216  * that certain asics don't have (all asics).
1217  * Returns the value in the register.
1218  */
1219 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220 {
1221         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222         BUG();
1223         return 0;
1224 }
1225
1226 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227 {
1228         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229         BUG();
1230         return 0;
1231 }
1232
1233 /**
1234  * amdgpu_invalid_wreg64 - dummy reg write function
1235  *
1236  * @adev: amdgpu_device pointer
1237  * @reg: offset of register
1238  * @v: value to write to the register
1239  *
1240  * Dummy register read function.  Used for register blocks
1241  * that certain asics don't have (all asics).
1242  */
1243 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244 {
1245         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246                   reg, v);
1247         BUG();
1248 }
1249
1250 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251 {
1252         DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253                   reg, v);
1254         BUG();
1255 }
1256
1257 /**
1258  * amdgpu_block_invalid_rreg - dummy reg read function
1259  *
1260  * @adev: amdgpu_device pointer
1261  * @block: offset of instance
1262  * @reg: offset of register
1263  *
1264  * Dummy register read function.  Used for register blocks
1265  * that certain asics don't have (all asics).
1266  * Returns the value in the register.
1267  */
1268 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269                                           uint32_t block, uint32_t reg)
1270 {
1271         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272                   reg, block);
1273         BUG();
1274         return 0;
1275 }
1276
1277 /**
1278  * amdgpu_block_invalid_wreg - dummy reg write function
1279  *
1280  * @adev: amdgpu_device pointer
1281  * @block: offset of instance
1282  * @reg: offset of register
1283  * @v: value to write to the register
1284  *
1285  * Dummy register read function.  Used for register blocks
1286  * that certain asics don't have (all asics).
1287  */
1288 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289                                       uint32_t block,
1290                                       uint32_t reg, uint32_t v)
1291 {
1292         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293                   reg, block, v);
1294         BUG();
1295 }
1296
1297 /**
1298  * amdgpu_device_asic_init - Wrapper for atom asic_init
1299  *
1300  * @adev: amdgpu_device pointer
1301  *
1302  * Does any asic specific work and then calls atom asic init.
1303  */
1304 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305 {
1306         int ret;
1307
1308         amdgpu_asic_pre_asic_init(adev);
1309
1310         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1312             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1313                 amdgpu_psp_wait_for_bootloader(adev);
1314                 ret = amdgpu_atomfirmware_asic_init(adev, true);
1315                 return ret;
1316         } else {
1317                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1318         }
1319
1320         return 0;
1321 }
1322
1323 /**
1324  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1325  *
1326  * @adev: amdgpu_device pointer
1327  *
1328  * Allocates a scratch page of VRAM for use by various things in the
1329  * driver.
1330  */
1331 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1332 {
1333         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1334                                        AMDGPU_GEM_DOMAIN_VRAM |
1335                                        AMDGPU_GEM_DOMAIN_GTT,
1336                                        &adev->mem_scratch.robj,
1337                                        &adev->mem_scratch.gpu_addr,
1338                                        (void **)&adev->mem_scratch.ptr);
1339 }
1340
1341 /**
1342  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1343  *
1344  * @adev: amdgpu_device pointer
1345  *
1346  * Frees the VRAM scratch page.
1347  */
1348 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1349 {
1350         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1351 }
1352
1353 /**
1354  * amdgpu_device_program_register_sequence - program an array of registers.
1355  *
1356  * @adev: amdgpu_device pointer
1357  * @registers: pointer to the register array
1358  * @array_size: size of the register array
1359  *
1360  * Programs an array or registers with and or masks.
1361  * This is a helper for setting golden registers.
1362  */
1363 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1364                                              const u32 *registers,
1365                                              const u32 array_size)
1366 {
1367         u32 tmp, reg, and_mask, or_mask;
1368         int i;
1369
1370         if (array_size % 3)
1371                 return;
1372
1373         for (i = 0; i < array_size; i += 3) {
1374                 reg = registers[i + 0];
1375                 and_mask = registers[i + 1];
1376                 or_mask = registers[i + 2];
1377
1378                 if (and_mask == 0xffffffff) {
1379                         tmp = or_mask;
1380                 } else {
1381                         tmp = RREG32(reg);
1382                         tmp &= ~and_mask;
1383                         if (adev->family >= AMDGPU_FAMILY_AI)
1384                                 tmp |= (or_mask & and_mask);
1385                         else
1386                                 tmp |= or_mask;
1387                 }
1388                 WREG32(reg, tmp);
1389         }
1390 }
1391
1392 /**
1393  * amdgpu_device_pci_config_reset - reset the GPU
1394  *
1395  * @adev: amdgpu_device pointer
1396  *
1397  * Resets the GPU using the pci config reset sequence.
1398  * Only applicable to asics prior to vega10.
1399  */
1400 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1401 {
1402         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1403 }
1404
1405 /**
1406  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1407  *
1408  * @adev: amdgpu_device pointer
1409  *
1410  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1411  */
1412 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1413 {
1414         return pci_reset_function(adev->pdev);
1415 }
1416
1417 /*
1418  * amdgpu_device_wb_*()
1419  * Writeback is the method by which the GPU updates special pages in memory
1420  * with the status of certain GPU events (fences, ring pointers,etc.).
1421  */
1422
1423 /**
1424  * amdgpu_device_wb_fini - Disable Writeback and free memory
1425  *
1426  * @adev: amdgpu_device pointer
1427  *
1428  * Disables Writeback and frees the Writeback memory (all asics).
1429  * Used at driver shutdown.
1430  */
1431 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1432 {
1433         if (adev->wb.wb_obj) {
1434                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1435                                       &adev->wb.gpu_addr,
1436                                       (void **)&adev->wb.wb);
1437                 adev->wb.wb_obj = NULL;
1438         }
1439 }
1440
1441 /**
1442  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1443  *
1444  * @adev: amdgpu_device pointer
1445  *
1446  * Initializes writeback and allocates writeback memory (all asics).
1447  * Used at driver startup.
1448  * Returns 0 on success or an -error on failure.
1449  */
1450 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1451 {
1452         int r;
1453
1454         if (adev->wb.wb_obj == NULL) {
1455                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1456                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1457                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1458                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1459                                             (void **)&adev->wb.wb);
1460                 if (r) {
1461                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1462                         return r;
1463                 }
1464
1465                 adev->wb.num_wb = AMDGPU_MAX_WB;
1466                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1467
1468                 /* clear wb memory */
1469                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1470         }
1471
1472         return 0;
1473 }
1474
1475 /**
1476  * amdgpu_device_wb_get - Allocate a wb entry
1477  *
1478  * @adev: amdgpu_device pointer
1479  * @wb: wb index
1480  *
1481  * Allocate a wb slot for use by the driver (all asics).
1482  * Returns 0 on success or -EINVAL on failure.
1483  */
1484 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1485 {
1486         unsigned long flags, offset;
1487
1488         spin_lock_irqsave(&adev->wb.lock, flags);
1489         offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1490         if (offset < adev->wb.num_wb) {
1491                 __set_bit(offset, adev->wb.used);
1492                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1493                 *wb = offset << 3; /* convert to dw offset */
1494                 return 0;
1495         } else {
1496                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1497                 return -EINVAL;
1498         }
1499 }
1500
1501 /**
1502  * amdgpu_device_wb_free - Free a wb entry
1503  *
1504  * @adev: amdgpu_device pointer
1505  * @wb: wb index
1506  *
1507  * Free a wb slot allocated for use by the driver (all asics)
1508  */
1509 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1510 {
1511         unsigned long flags;
1512
1513         wb >>= 3;
1514         spin_lock_irqsave(&adev->wb.lock, flags);
1515         if (wb < adev->wb.num_wb)
1516                 __clear_bit(wb, adev->wb.used);
1517         spin_unlock_irqrestore(&adev->wb.lock, flags);
1518 }
1519
1520 /**
1521  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1522  *
1523  * @adev: amdgpu_device pointer
1524  *
1525  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1526  * to fail, but if any of the BARs is not accessible after the size we abort
1527  * driver loading by returning -ENODEV.
1528  */
1529 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1530 {
1531         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1532         struct pci_bus *root;
1533         struct resource *res;
1534         unsigned int i;
1535         u16 cmd;
1536         int r;
1537
1538         if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1539                 return 0;
1540
1541         /* Bypass for VF */
1542         if (amdgpu_sriov_vf(adev))
1543                 return 0;
1544
1545         /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1546         if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1547                 DRM_WARN("System can't access extended configuration space, please check!!\n");
1548
1549         /* skip if the bios has already enabled large BAR */
1550         if (adev->gmc.real_vram_size &&
1551             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1552                 return 0;
1553
1554         /* Check if the root BUS has 64bit memory resources */
1555         root = adev->pdev->bus;
1556         while (root->parent)
1557                 root = root->parent;
1558
1559         pci_bus_for_each_resource(root, res, i) {
1560                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1561                     res->start > 0x100000000ull)
1562                         break;
1563         }
1564
1565         /* Trying to resize is pointless without a root hub window above 4GB */
1566         if (!res)
1567                 return 0;
1568
1569         /* Limit the BAR size to what is available */
1570         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1571                         rbar_size);
1572
1573         /* Disable memory decoding while we change the BAR addresses and size */
1574         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1575         pci_write_config_word(adev->pdev, PCI_COMMAND,
1576                               cmd & ~PCI_COMMAND_MEMORY);
1577
1578         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1579         amdgpu_doorbell_fini(adev);
1580         if (adev->asic_type >= CHIP_BONAIRE)
1581                 pci_release_resource(adev->pdev, 2);
1582
1583         pci_release_resource(adev->pdev, 0);
1584
1585         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1586         if (r == -ENOSPC)
1587                 DRM_INFO("Not enough PCI address space for a large BAR.");
1588         else if (r && r != -ENOTSUPP)
1589                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1590
1591         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1592
1593         /* When the doorbell or fb BAR isn't available we have no chance of
1594          * using the device.
1595          */
1596         r = amdgpu_doorbell_init(adev);
1597         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1598                 return -ENODEV;
1599
1600         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1601
1602         return 0;
1603 }
1604
1605 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1606 {
1607         if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1608                 return false;
1609
1610         return true;
1611 }
1612
1613 /*
1614  * GPU helpers function.
1615  */
1616 /**
1617  * amdgpu_device_need_post - check if the hw need post or not
1618  *
1619  * @adev: amdgpu_device pointer
1620  *
1621  * Check if the asic has been initialized (all asics) at driver startup
1622  * or post is needed if  hw reset is performed.
1623  * Returns true if need or false if not.
1624  */
1625 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1626 {
1627         uint32_t reg;
1628
1629         if (amdgpu_sriov_vf(adev))
1630                 return false;
1631
1632         if (!amdgpu_device_read_bios(adev))
1633                 return false;
1634
1635         if (amdgpu_passthrough(adev)) {
1636                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1637                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1638                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1639                  * vpost executed for smc version below 22.15
1640                  */
1641                 if (adev->asic_type == CHIP_FIJI) {
1642                         int err;
1643                         uint32_t fw_ver;
1644
1645                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1646                         /* force vPost if error occured */
1647                         if (err)
1648                                 return true;
1649
1650                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1651                         release_firmware(adev->pm.fw);
1652                         if (fw_ver < 0x00160e00)
1653                                 return true;
1654                 }
1655         }
1656
1657         /* Don't post if we need to reset whole hive on init */
1658         if (adev->gmc.xgmi.pending_reset)
1659                 return false;
1660
1661         if (adev->has_hw_reset) {
1662                 adev->has_hw_reset = false;
1663                 return true;
1664         }
1665
1666         /* bios scratch used on CIK+ */
1667         if (adev->asic_type >= CHIP_BONAIRE)
1668                 return amdgpu_atombios_scratch_need_asic_init(adev);
1669
1670         /* check MEM_SIZE for older asics */
1671         reg = amdgpu_asic_get_config_memsize(adev);
1672
1673         if ((reg != 0) && (reg != 0xffffffff))
1674                 return false;
1675
1676         return true;
1677 }
1678
1679 /*
1680  * Check whether seamless boot is supported.
1681  *
1682  * So far we only support seamless boot on DCE 3.0 or later.
1683  * If users report that it works on older ASICS as well, we may
1684  * loosen this.
1685  */
1686 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1687 {
1688         switch (amdgpu_seamless) {
1689         case -1:
1690                 break;
1691         case 1:
1692                 return true;
1693         case 0:
1694                 return false;
1695         default:
1696                 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1697                           amdgpu_seamless);
1698                 return false;
1699         }
1700
1701         if (!(adev->flags & AMD_IS_APU))
1702                 return false;
1703
1704         if (adev->mman.keep_stolen_vga_memory)
1705                 return false;
1706
1707         return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1708 }
1709
1710 /*
1711  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1712  * don't support dynamic speed switching. Until we have confirmation from Intel
1713  * that a specific host supports it, it's safer that we keep it disabled for all.
1714  *
1715  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1716  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1717  */
1718 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1719 {
1720 #if IS_ENABLED(CONFIG_X86)
1721         struct cpuinfo_x86 *c = &cpu_data(0);
1722
1723         /* eGPU change speeds based on USB4 fabric conditions */
1724         if (dev_is_removable(adev->dev))
1725                 return true;
1726
1727         if (c->x86_vendor == X86_VENDOR_INTEL)
1728                 return false;
1729 #endif
1730         return true;
1731 }
1732
1733 /**
1734  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1735  *
1736  * @adev: amdgpu_device pointer
1737  *
1738  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1739  * be set for this device.
1740  *
1741  * Returns true if it should be used or false if not.
1742  */
1743 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1744 {
1745         switch (amdgpu_aspm) {
1746         case -1:
1747                 break;
1748         case 0:
1749                 return false;
1750         case 1:
1751                 return true;
1752         default:
1753                 return false;
1754         }
1755         if (adev->flags & AMD_IS_APU)
1756                 return false;
1757         if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1758                 return false;
1759         return pcie_aspm_enabled(adev->pdev);
1760 }
1761
1762 /* if we get transitioned to only one device, take VGA back */
1763 /**
1764  * amdgpu_device_vga_set_decode - enable/disable vga decode
1765  *
1766  * @pdev: PCI device pointer
1767  * @state: enable/disable vga decode
1768  *
1769  * Enable/disable vga decode (all asics).
1770  * Returns VGA resource flags.
1771  */
1772 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1773                 bool state)
1774 {
1775         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1776
1777         amdgpu_asic_set_vga_state(adev, state);
1778         if (state)
1779                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1780                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1781         else
1782                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1783 }
1784
1785 /**
1786  * amdgpu_device_check_block_size - validate the vm block size
1787  *
1788  * @adev: amdgpu_device pointer
1789  *
1790  * Validates the vm block size specified via module parameter.
1791  * The vm block size defines number of bits in page table versus page directory,
1792  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1793  * page table and the remaining bits are in the page directory.
1794  */
1795 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1796 {
1797         /* defines number of bits in page table versus page directory,
1798          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1799          * page table and the remaining bits are in the page directory
1800          */
1801         if (amdgpu_vm_block_size == -1)
1802                 return;
1803
1804         if (amdgpu_vm_block_size < 9) {
1805                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1806                          amdgpu_vm_block_size);
1807                 amdgpu_vm_block_size = -1;
1808         }
1809 }
1810
1811 /**
1812  * amdgpu_device_check_vm_size - validate the vm size
1813  *
1814  * @adev: amdgpu_device pointer
1815  *
1816  * Validates the vm size in GB specified via module parameter.
1817  * The VM size is the size of the GPU virtual memory space in GB.
1818  */
1819 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1820 {
1821         /* no need to check the default value */
1822         if (amdgpu_vm_size == -1)
1823                 return;
1824
1825         if (amdgpu_vm_size < 1) {
1826                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1827                          amdgpu_vm_size);
1828                 amdgpu_vm_size = -1;
1829         }
1830 }
1831
1832 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1833 {
1834         struct sysinfo si;
1835         bool is_os_64 = (sizeof(void *) == 8);
1836         uint64_t total_memory;
1837         uint64_t dram_size_seven_GB = 0x1B8000000;
1838         uint64_t dram_size_three_GB = 0xB8000000;
1839
1840         if (amdgpu_smu_memory_pool_size == 0)
1841                 return;
1842
1843         if (!is_os_64) {
1844                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1845                 goto def_value;
1846         }
1847         si_meminfo(&si);
1848         total_memory = (uint64_t)si.totalram * si.mem_unit;
1849
1850         if ((amdgpu_smu_memory_pool_size == 1) ||
1851                 (amdgpu_smu_memory_pool_size == 2)) {
1852                 if (total_memory < dram_size_three_GB)
1853                         goto def_value1;
1854         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1855                 (amdgpu_smu_memory_pool_size == 8)) {
1856                 if (total_memory < dram_size_seven_GB)
1857                         goto def_value1;
1858         } else {
1859                 DRM_WARN("Smu memory pool size not supported\n");
1860                 goto def_value;
1861         }
1862         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1863
1864         return;
1865
1866 def_value1:
1867         DRM_WARN("No enough system memory\n");
1868 def_value:
1869         adev->pm.smu_prv_buffer_size = 0;
1870 }
1871
1872 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1873 {
1874         if (!(adev->flags & AMD_IS_APU) ||
1875             adev->asic_type < CHIP_RAVEN)
1876                 return 0;
1877
1878         switch (adev->asic_type) {
1879         case CHIP_RAVEN:
1880                 if (adev->pdev->device == 0x15dd)
1881                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1882                 if (adev->pdev->device == 0x15d8)
1883                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1884                 break;
1885         case CHIP_RENOIR:
1886                 if ((adev->pdev->device == 0x1636) ||
1887                     (adev->pdev->device == 0x164c))
1888                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1889                 else
1890                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1891                 break;
1892         case CHIP_VANGOGH:
1893                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1894                 break;
1895         case CHIP_YELLOW_CARP:
1896                 break;
1897         case CHIP_CYAN_SKILLFISH:
1898                 if ((adev->pdev->device == 0x13FE) ||
1899                     (adev->pdev->device == 0x143F))
1900                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1901                 break;
1902         default:
1903                 break;
1904         }
1905
1906         return 0;
1907 }
1908
1909 /**
1910  * amdgpu_device_check_arguments - validate module params
1911  *
1912  * @adev: amdgpu_device pointer
1913  *
1914  * Validates certain module parameters and updates
1915  * the associated values used by the driver (all asics).
1916  */
1917 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1918 {
1919         if (amdgpu_sched_jobs < 4) {
1920                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1921                          amdgpu_sched_jobs);
1922                 amdgpu_sched_jobs = 4;
1923         } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1924                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1925                          amdgpu_sched_jobs);
1926                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1927         }
1928
1929         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1930                 /* gart size must be greater or equal to 32M */
1931                 dev_warn(adev->dev, "gart size (%d) too small\n",
1932                          amdgpu_gart_size);
1933                 amdgpu_gart_size = -1;
1934         }
1935
1936         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1937                 /* gtt size must be greater or equal to 32M */
1938                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1939                                  amdgpu_gtt_size);
1940                 amdgpu_gtt_size = -1;
1941         }
1942
1943         /* valid range is between 4 and 9 inclusive */
1944         if (amdgpu_vm_fragment_size != -1 &&
1945             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1946                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1947                 amdgpu_vm_fragment_size = -1;
1948         }
1949
1950         if (amdgpu_sched_hw_submission < 2) {
1951                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1952                          amdgpu_sched_hw_submission);
1953                 amdgpu_sched_hw_submission = 2;
1954         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1955                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1956                          amdgpu_sched_hw_submission);
1957                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1958         }
1959
1960         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1961                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1962                 amdgpu_reset_method = -1;
1963         }
1964
1965         amdgpu_device_check_smu_prv_buffer_size(adev);
1966
1967         amdgpu_device_check_vm_size(adev);
1968
1969         amdgpu_device_check_block_size(adev);
1970
1971         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1972
1973         return 0;
1974 }
1975
1976 /**
1977  * amdgpu_switcheroo_set_state - set switcheroo state
1978  *
1979  * @pdev: pci dev pointer
1980  * @state: vga_switcheroo state
1981  *
1982  * Callback for the switcheroo driver.  Suspends or resumes
1983  * the asics before or after it is powered up using ACPI methods.
1984  */
1985 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1986                                         enum vga_switcheroo_state state)
1987 {
1988         struct drm_device *dev = pci_get_drvdata(pdev);
1989         int r;
1990
1991         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1992                 return;
1993
1994         if (state == VGA_SWITCHEROO_ON) {
1995                 pr_info("switched on\n");
1996                 /* don't suspend or resume card normally */
1997                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1998
1999                 pci_set_power_state(pdev, PCI_D0);
2000                 amdgpu_device_load_pci_state(pdev);
2001                 r = pci_enable_device(pdev);
2002                 if (r)
2003                         DRM_WARN("pci_enable_device failed (%d)\n", r);
2004                 amdgpu_device_resume(dev, true);
2005
2006                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
2007         } else {
2008                 pr_info("switched off\n");
2009                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2010                 amdgpu_device_prepare(dev);
2011                 amdgpu_device_suspend(dev, true);
2012                 amdgpu_device_cache_pci_state(pdev);
2013                 /* Shut down the device */
2014                 pci_disable_device(pdev);
2015                 pci_set_power_state(pdev, PCI_D3cold);
2016                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2017         }
2018 }
2019
2020 /**
2021  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2022  *
2023  * @pdev: pci dev pointer
2024  *
2025  * Callback for the switcheroo driver.  Check of the switcheroo
2026  * state can be changed.
2027  * Returns true if the state can be changed, false if not.
2028  */
2029 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2030 {
2031         struct drm_device *dev = pci_get_drvdata(pdev);
2032
2033        /*
2034         * FIXME: open_count is protected by drm_global_mutex but that would lead to
2035         * locking inversion with the driver load path. And the access here is
2036         * completely racy anyway. So don't bother with locking for now.
2037         */
2038         return atomic_read(&dev->open_count) == 0;
2039 }
2040
2041 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2042         .set_gpu_state = amdgpu_switcheroo_set_state,
2043         .reprobe = NULL,
2044         .can_switch = amdgpu_switcheroo_can_switch,
2045 };
2046
2047 /**
2048  * amdgpu_device_ip_set_clockgating_state - set the CG state
2049  *
2050  * @dev: amdgpu_device pointer
2051  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2052  * @state: clockgating state (gate or ungate)
2053  *
2054  * Sets the requested clockgating state for all instances of
2055  * the hardware IP specified.
2056  * Returns the error code from the last instance.
2057  */
2058 int amdgpu_device_ip_set_clockgating_state(void *dev,
2059                                            enum amd_ip_block_type block_type,
2060                                            enum amd_clockgating_state state)
2061 {
2062         struct amdgpu_device *adev = dev;
2063         int i, r = 0;
2064
2065         for (i = 0; i < adev->num_ip_blocks; i++) {
2066                 if (!adev->ip_blocks[i].status.valid)
2067                         continue;
2068                 if (adev->ip_blocks[i].version->type != block_type)
2069                         continue;
2070                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2071                         continue;
2072                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2073                         (void *)adev, state);
2074                 if (r)
2075                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2076                                   adev->ip_blocks[i].version->funcs->name, r);
2077         }
2078         return r;
2079 }
2080
2081 /**
2082  * amdgpu_device_ip_set_powergating_state - set the PG state
2083  *
2084  * @dev: amdgpu_device pointer
2085  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2086  * @state: powergating state (gate or ungate)
2087  *
2088  * Sets the requested powergating state for all instances of
2089  * the hardware IP specified.
2090  * Returns the error code from the last instance.
2091  */
2092 int amdgpu_device_ip_set_powergating_state(void *dev,
2093                                            enum amd_ip_block_type block_type,
2094                                            enum amd_powergating_state state)
2095 {
2096         struct amdgpu_device *adev = dev;
2097         int i, r = 0;
2098
2099         for (i = 0; i < adev->num_ip_blocks; i++) {
2100                 if (!adev->ip_blocks[i].status.valid)
2101                         continue;
2102                 if (adev->ip_blocks[i].version->type != block_type)
2103                         continue;
2104                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2105                         continue;
2106                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2107                         (void *)adev, state);
2108                 if (r)
2109                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2110                                   adev->ip_blocks[i].version->funcs->name, r);
2111         }
2112         return r;
2113 }
2114
2115 /**
2116  * amdgpu_device_ip_get_clockgating_state - get the CG state
2117  *
2118  * @adev: amdgpu_device pointer
2119  * @flags: clockgating feature flags
2120  *
2121  * Walks the list of IPs on the device and updates the clockgating
2122  * flags for each IP.
2123  * Updates @flags with the feature flags for each hardware IP where
2124  * clockgating is enabled.
2125  */
2126 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2127                                             u64 *flags)
2128 {
2129         int i;
2130
2131         for (i = 0; i < adev->num_ip_blocks; i++) {
2132                 if (!adev->ip_blocks[i].status.valid)
2133                         continue;
2134                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2135                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2136         }
2137 }
2138
2139 /**
2140  * amdgpu_device_ip_wait_for_idle - wait for idle
2141  *
2142  * @adev: amdgpu_device pointer
2143  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2144  *
2145  * Waits for the request hardware IP to be idle.
2146  * Returns 0 for success or a negative error code on failure.
2147  */
2148 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2149                                    enum amd_ip_block_type block_type)
2150 {
2151         int i, r;
2152
2153         for (i = 0; i < adev->num_ip_blocks; i++) {
2154                 if (!adev->ip_blocks[i].status.valid)
2155                         continue;
2156                 if (adev->ip_blocks[i].version->type == block_type) {
2157                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2158                         if (r)
2159                                 return r;
2160                         break;
2161                 }
2162         }
2163         return 0;
2164
2165 }
2166
2167 /**
2168  * amdgpu_device_ip_is_idle - is the hardware IP idle
2169  *
2170  * @adev: amdgpu_device pointer
2171  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2172  *
2173  * Check if the hardware IP is idle or not.
2174  * Returns true if it the IP is idle, false if not.
2175  */
2176 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2177                               enum amd_ip_block_type block_type)
2178 {
2179         int i;
2180
2181         for (i = 0; i < adev->num_ip_blocks; i++) {
2182                 if (!adev->ip_blocks[i].status.valid)
2183                         continue;
2184                 if (adev->ip_blocks[i].version->type == block_type)
2185                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2186         }
2187         return true;
2188
2189 }
2190
2191 /**
2192  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2193  *
2194  * @adev: amdgpu_device pointer
2195  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2196  *
2197  * Returns a pointer to the hardware IP block structure
2198  * if it exists for the asic, otherwise NULL.
2199  */
2200 struct amdgpu_ip_block *
2201 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2202                               enum amd_ip_block_type type)
2203 {
2204         int i;
2205
2206         for (i = 0; i < adev->num_ip_blocks; i++)
2207                 if (adev->ip_blocks[i].version->type == type)
2208                         return &adev->ip_blocks[i];
2209
2210         return NULL;
2211 }
2212
2213 /**
2214  * amdgpu_device_ip_block_version_cmp
2215  *
2216  * @adev: amdgpu_device pointer
2217  * @type: enum amd_ip_block_type
2218  * @major: major version
2219  * @minor: minor version
2220  *
2221  * return 0 if equal or greater
2222  * return 1 if smaller or the ip_block doesn't exist
2223  */
2224 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2225                                        enum amd_ip_block_type type,
2226                                        u32 major, u32 minor)
2227 {
2228         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2229
2230         if (ip_block && ((ip_block->version->major > major) ||
2231                         ((ip_block->version->major == major) &&
2232                         (ip_block->version->minor >= minor))))
2233                 return 0;
2234
2235         return 1;
2236 }
2237
2238 /**
2239  * amdgpu_device_ip_block_add
2240  *
2241  * @adev: amdgpu_device pointer
2242  * @ip_block_version: pointer to the IP to add
2243  *
2244  * Adds the IP block driver information to the collection of IPs
2245  * on the asic.
2246  */
2247 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2248                                const struct amdgpu_ip_block_version *ip_block_version)
2249 {
2250         if (!ip_block_version)
2251                 return -EINVAL;
2252
2253         switch (ip_block_version->type) {
2254         case AMD_IP_BLOCK_TYPE_VCN:
2255                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2256                         return 0;
2257                 break;
2258         case AMD_IP_BLOCK_TYPE_JPEG:
2259                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2260                         return 0;
2261                 break;
2262         default:
2263                 break;
2264         }
2265
2266         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2267                   ip_block_version->funcs->name);
2268
2269         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2270
2271         return 0;
2272 }
2273
2274 /**
2275  * amdgpu_device_enable_virtual_display - enable virtual display feature
2276  *
2277  * @adev: amdgpu_device pointer
2278  *
2279  * Enabled the virtual display feature if the user has enabled it via
2280  * the module parameter virtual_display.  This feature provides a virtual
2281  * display hardware on headless boards or in virtualized environments.
2282  * This function parses and validates the configuration string specified by
2283  * the user and configues the virtual display configuration (number of
2284  * virtual connectors, crtcs, etc.) specified.
2285  */
2286 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2287 {
2288         adev->enable_virtual_display = false;
2289
2290         if (amdgpu_virtual_display) {
2291                 const char *pci_address_name = pci_name(adev->pdev);
2292                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2293
2294                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2295                 pciaddstr_tmp = pciaddstr;
2296                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2297                         pciaddname = strsep(&pciaddname_tmp, ",");
2298                         if (!strcmp("all", pciaddname)
2299                             || !strcmp(pci_address_name, pciaddname)) {
2300                                 long num_crtc;
2301                                 int res = -1;
2302
2303                                 adev->enable_virtual_display = true;
2304
2305                                 if (pciaddname_tmp)
2306                                         res = kstrtol(pciaddname_tmp, 10,
2307                                                       &num_crtc);
2308
2309                                 if (!res) {
2310                                         if (num_crtc < 1)
2311                                                 num_crtc = 1;
2312                                         if (num_crtc > 6)
2313                                                 num_crtc = 6;
2314                                         adev->mode_info.num_crtc = num_crtc;
2315                                 } else {
2316                                         adev->mode_info.num_crtc = 1;
2317                                 }
2318                                 break;
2319                         }
2320                 }
2321
2322                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2323                          amdgpu_virtual_display, pci_address_name,
2324                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2325
2326                 kfree(pciaddstr);
2327         }
2328 }
2329
2330 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2331 {
2332         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2333                 adev->mode_info.num_crtc = 1;
2334                 adev->enable_virtual_display = true;
2335                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2336                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2337         }
2338 }
2339
2340 /**
2341  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2342  *
2343  * @adev: amdgpu_device pointer
2344  *
2345  * Parses the asic configuration parameters specified in the gpu info
2346  * firmware and makes them availale to the driver for use in configuring
2347  * the asic.
2348  * Returns 0 on success, -EINVAL on failure.
2349  */
2350 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2351 {
2352         const char *chip_name;
2353         char fw_name[40];
2354         int err;
2355         const struct gpu_info_firmware_header_v1_0 *hdr;
2356
2357         adev->firmware.gpu_info_fw = NULL;
2358
2359         if (adev->mman.discovery_bin)
2360                 return 0;
2361
2362         switch (adev->asic_type) {
2363         default:
2364                 return 0;
2365         case CHIP_VEGA10:
2366                 chip_name = "vega10";
2367                 break;
2368         case CHIP_VEGA12:
2369                 chip_name = "vega12";
2370                 break;
2371         case CHIP_RAVEN:
2372                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2373                         chip_name = "raven2";
2374                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2375                         chip_name = "picasso";
2376                 else
2377                         chip_name = "raven";
2378                 break;
2379         case CHIP_ARCTURUS:
2380                 chip_name = "arcturus";
2381                 break;
2382         case CHIP_NAVI12:
2383                 chip_name = "navi12";
2384                 break;
2385         }
2386
2387         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2388         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2389         if (err) {
2390                 dev_err(adev->dev,
2391                         "Failed to get gpu_info firmware \"%s\"\n",
2392                         fw_name);
2393                 goto out;
2394         }
2395
2396         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2397         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2398
2399         switch (hdr->version_major) {
2400         case 1:
2401         {
2402                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2403                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2404                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2405
2406                 /*
2407                  * Should be droped when DAL no longer needs it.
2408                  */
2409                 if (adev->asic_type == CHIP_NAVI12)
2410                         goto parse_soc_bounding_box;
2411
2412                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2413                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2414                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2415                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2416                 adev->gfx.config.max_texture_channel_caches =
2417                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2418                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2419                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2420                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2421                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2422                 adev->gfx.config.double_offchip_lds_buf =
2423                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2424                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2425                 adev->gfx.cu_info.max_waves_per_simd =
2426                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2427                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2428                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2429                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2430                 if (hdr->version_minor >= 1) {
2431                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2432                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2433                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2434                         adev->gfx.config.num_sc_per_sh =
2435                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2436                         adev->gfx.config.num_packer_per_sc =
2437                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2438                 }
2439
2440 parse_soc_bounding_box:
2441                 /*
2442                  * soc bounding box info is not integrated in disocovery table,
2443                  * we always need to parse it from gpu info firmware if needed.
2444                  */
2445                 if (hdr->version_minor == 2) {
2446                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2447                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2448                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2449                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2450                 }
2451                 break;
2452         }
2453         default:
2454                 dev_err(adev->dev,
2455                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2456                 err = -EINVAL;
2457                 goto out;
2458         }
2459 out:
2460         return err;
2461 }
2462
2463 /**
2464  * amdgpu_device_ip_early_init - run early init for hardware IPs
2465  *
2466  * @adev: amdgpu_device pointer
2467  *
2468  * Early initialization pass for hardware IPs.  The hardware IPs that make
2469  * up each asic are discovered each IP's early_init callback is run.  This
2470  * is the first stage in initializing the asic.
2471  * Returns 0 on success, negative error code on failure.
2472  */
2473 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2474 {
2475         struct pci_dev *parent;
2476         int i, r;
2477         bool total;
2478
2479         amdgpu_device_enable_virtual_display(adev);
2480
2481         if (amdgpu_sriov_vf(adev)) {
2482                 r = amdgpu_virt_request_full_gpu(adev, true);
2483                 if (r)
2484                         return r;
2485         }
2486
2487         switch (adev->asic_type) {
2488 #ifdef CONFIG_DRM_AMDGPU_SI
2489         case CHIP_VERDE:
2490         case CHIP_TAHITI:
2491         case CHIP_PITCAIRN:
2492         case CHIP_OLAND:
2493         case CHIP_HAINAN:
2494                 adev->family = AMDGPU_FAMILY_SI;
2495                 r = si_set_ip_blocks(adev);
2496                 if (r)
2497                         return r;
2498                 break;
2499 #endif
2500 #ifdef CONFIG_DRM_AMDGPU_CIK
2501         case CHIP_BONAIRE:
2502         case CHIP_HAWAII:
2503         case CHIP_KAVERI:
2504         case CHIP_KABINI:
2505         case CHIP_MULLINS:
2506                 if (adev->flags & AMD_IS_APU)
2507                         adev->family = AMDGPU_FAMILY_KV;
2508                 else
2509                         adev->family = AMDGPU_FAMILY_CI;
2510
2511                 r = cik_set_ip_blocks(adev);
2512                 if (r)
2513                         return r;
2514                 break;
2515 #endif
2516         case CHIP_TOPAZ:
2517         case CHIP_TONGA:
2518         case CHIP_FIJI:
2519         case CHIP_POLARIS10:
2520         case CHIP_POLARIS11:
2521         case CHIP_POLARIS12:
2522         case CHIP_VEGAM:
2523         case CHIP_CARRIZO:
2524         case CHIP_STONEY:
2525                 if (adev->flags & AMD_IS_APU)
2526                         adev->family = AMDGPU_FAMILY_CZ;
2527                 else
2528                         adev->family = AMDGPU_FAMILY_VI;
2529
2530                 r = vi_set_ip_blocks(adev);
2531                 if (r)
2532                         return r;
2533                 break;
2534         default:
2535                 r = amdgpu_discovery_set_ip_blocks(adev);
2536                 if (r)
2537                         return r;
2538                 break;
2539         }
2540
2541         if (amdgpu_has_atpx() &&
2542             (amdgpu_is_atpx_hybrid() ||
2543              amdgpu_has_atpx_dgpu_power_cntl()) &&
2544             ((adev->flags & AMD_IS_APU) == 0) &&
2545             !dev_is_removable(&adev->pdev->dev))
2546                 adev->flags |= AMD_IS_PX;
2547
2548         if (!(adev->flags & AMD_IS_APU)) {
2549                 parent = pcie_find_root_port(adev->pdev);
2550                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2551         }
2552
2553
2554         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2555         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2556                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2557         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2558                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2559         if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2560                 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2561
2562         total = true;
2563         for (i = 0; i < adev->num_ip_blocks; i++) {
2564                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2565                         DRM_WARN("disabled ip block: %d <%s>\n",
2566                                   i, adev->ip_blocks[i].version->funcs->name);
2567                         adev->ip_blocks[i].status.valid = false;
2568                 } else {
2569                         if (adev->ip_blocks[i].version->funcs->early_init) {
2570                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2571                                 if (r == -ENOENT) {
2572                                         adev->ip_blocks[i].status.valid = false;
2573                                 } else if (r) {
2574                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2575                                                   adev->ip_blocks[i].version->funcs->name, r);
2576                                         total = false;
2577                                 } else {
2578                                         adev->ip_blocks[i].status.valid = true;
2579                                 }
2580                         } else {
2581                                 adev->ip_blocks[i].status.valid = true;
2582                         }
2583                 }
2584                 /* get the vbios after the asic_funcs are set up */
2585                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2586                         r = amdgpu_device_parse_gpu_info_fw(adev);
2587                         if (r)
2588                                 return r;
2589
2590                         /* Read BIOS */
2591                         if (amdgpu_device_read_bios(adev)) {
2592                                 if (!amdgpu_get_bios(adev))
2593                                         return -EINVAL;
2594
2595                                 r = amdgpu_atombios_init(adev);
2596                                 if (r) {
2597                                         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2598                                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2599                                         return r;
2600                                 }
2601                         }
2602
2603                         /*get pf2vf msg info at it's earliest time*/
2604                         if (amdgpu_sriov_vf(adev))
2605                                 amdgpu_virt_init_data_exchange(adev);
2606
2607                 }
2608         }
2609         if (!total)
2610                 return -ENODEV;
2611
2612         amdgpu_amdkfd_device_probe(adev);
2613         adev->cg_flags &= amdgpu_cg_mask;
2614         adev->pg_flags &= amdgpu_pg_mask;
2615
2616         return 0;
2617 }
2618
2619 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2620 {
2621         int i, r;
2622
2623         for (i = 0; i < adev->num_ip_blocks; i++) {
2624                 if (!adev->ip_blocks[i].status.sw)
2625                         continue;
2626                 if (adev->ip_blocks[i].status.hw)
2627                         continue;
2628                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2629                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2630                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2631                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2632                         if (r) {
2633                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2634                                           adev->ip_blocks[i].version->funcs->name, r);
2635                                 return r;
2636                         }
2637                         adev->ip_blocks[i].status.hw = true;
2638                 }
2639         }
2640
2641         return 0;
2642 }
2643
2644 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2645 {
2646         int i, r;
2647
2648         for (i = 0; i < adev->num_ip_blocks; i++) {
2649                 if (!adev->ip_blocks[i].status.sw)
2650                         continue;
2651                 if (adev->ip_blocks[i].status.hw)
2652                         continue;
2653                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2654                 if (r) {
2655                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2656                                   adev->ip_blocks[i].version->funcs->name, r);
2657                         return r;
2658                 }
2659                 adev->ip_blocks[i].status.hw = true;
2660         }
2661
2662         return 0;
2663 }
2664
2665 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2666 {
2667         int r = 0;
2668         int i;
2669         uint32_t smu_version;
2670
2671         if (adev->asic_type >= CHIP_VEGA10) {
2672                 for (i = 0; i < adev->num_ip_blocks; i++) {
2673                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2674                                 continue;
2675
2676                         if (!adev->ip_blocks[i].status.sw)
2677                                 continue;
2678
2679                         /* no need to do the fw loading again if already done*/
2680                         if (adev->ip_blocks[i].status.hw == true)
2681                                 break;
2682
2683                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2684                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2685                                 if (r) {
2686                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2687                                                           adev->ip_blocks[i].version->funcs->name, r);
2688                                         return r;
2689                                 }
2690                         } else {
2691                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2692                                 if (r) {
2693                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2694                                                           adev->ip_blocks[i].version->funcs->name, r);
2695                                         return r;
2696                                 }
2697                         }
2698
2699                         adev->ip_blocks[i].status.hw = true;
2700                         break;
2701                 }
2702         }
2703
2704         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2705                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2706
2707         return r;
2708 }
2709
2710 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2711 {
2712         long timeout;
2713         int r, i;
2714
2715         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2716                 struct amdgpu_ring *ring = adev->rings[i];
2717
2718                 /* No need to setup the GPU scheduler for rings that don't need it */
2719                 if (!ring || ring->no_scheduler)
2720                         continue;
2721
2722                 switch (ring->funcs->type) {
2723                 case AMDGPU_RING_TYPE_GFX:
2724                         timeout = adev->gfx_timeout;
2725                         break;
2726                 case AMDGPU_RING_TYPE_COMPUTE:
2727                         timeout = adev->compute_timeout;
2728                         break;
2729                 case AMDGPU_RING_TYPE_SDMA:
2730                         timeout = adev->sdma_timeout;
2731                         break;
2732                 default:
2733                         timeout = adev->video_timeout;
2734                         break;
2735                 }
2736
2737                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2738                                    DRM_SCHED_PRIORITY_COUNT,
2739                                    ring->num_hw_submission, 0,
2740                                    timeout, adev->reset_domain->wq,
2741                                    ring->sched_score, ring->name,
2742                                    adev->dev);
2743                 if (r) {
2744                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2745                                   ring->name);
2746                         return r;
2747                 }
2748                 r = amdgpu_uvd_entity_init(adev, ring);
2749                 if (r) {
2750                         DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2751                                   ring->name);
2752                         return r;
2753                 }
2754                 r = amdgpu_vce_entity_init(adev, ring);
2755                 if (r) {
2756                         DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2757                                   ring->name);
2758                         return r;
2759                 }
2760         }
2761
2762         amdgpu_xcp_update_partition_sched_list(adev);
2763
2764         return 0;
2765 }
2766
2767
2768 /**
2769  * amdgpu_device_ip_init - run init for hardware IPs
2770  *
2771  * @adev: amdgpu_device pointer
2772  *
2773  * Main initialization pass for hardware IPs.  The list of all the hardware
2774  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2775  * are run.  sw_init initializes the software state associated with each IP
2776  * and hw_init initializes the hardware associated with each IP.
2777  * Returns 0 on success, negative error code on failure.
2778  */
2779 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2780 {
2781         int i, r;
2782
2783         r = amdgpu_ras_init(adev);
2784         if (r)
2785                 return r;
2786
2787         for (i = 0; i < adev->num_ip_blocks; i++) {
2788                 if (!adev->ip_blocks[i].status.valid)
2789                         continue;
2790                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2791                 if (r) {
2792                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2793                                   adev->ip_blocks[i].version->funcs->name, r);
2794                         goto init_failed;
2795                 }
2796                 adev->ip_blocks[i].status.sw = true;
2797
2798                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2799                         /* need to do common hw init early so everything is set up for gmc */
2800                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2801                         if (r) {
2802                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2803                                 goto init_failed;
2804                         }
2805                         adev->ip_blocks[i].status.hw = true;
2806                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2807                         /* need to do gmc hw init early so we can allocate gpu mem */
2808                         /* Try to reserve bad pages early */
2809                         if (amdgpu_sriov_vf(adev))
2810                                 amdgpu_virt_exchange_data(adev);
2811
2812                         r = amdgpu_device_mem_scratch_init(adev);
2813                         if (r) {
2814                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2815                                 goto init_failed;
2816                         }
2817                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2818                         if (r) {
2819                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2820                                 goto init_failed;
2821                         }
2822                         r = amdgpu_device_wb_init(adev);
2823                         if (r) {
2824                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2825                                 goto init_failed;
2826                         }
2827                         adev->ip_blocks[i].status.hw = true;
2828
2829                         /* right after GMC hw init, we create CSA */
2830                         if (adev->gfx.mcbp) {
2831                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2832                                                                AMDGPU_GEM_DOMAIN_VRAM |
2833                                                                AMDGPU_GEM_DOMAIN_GTT,
2834                                                                AMDGPU_CSA_SIZE);
2835                                 if (r) {
2836                                         DRM_ERROR("allocate CSA failed %d\n", r);
2837                                         goto init_failed;
2838                                 }
2839                         }
2840
2841                         r = amdgpu_seq64_init(adev);
2842                         if (r) {
2843                                 DRM_ERROR("allocate seq64 failed %d\n", r);
2844                                 goto init_failed;
2845                         }
2846                 }
2847         }
2848
2849         if (amdgpu_sriov_vf(adev))
2850                 amdgpu_virt_init_data_exchange(adev);
2851
2852         r = amdgpu_ib_pool_init(adev);
2853         if (r) {
2854                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2855                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2856                 goto init_failed;
2857         }
2858
2859         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2860         if (r)
2861                 goto init_failed;
2862
2863         r = amdgpu_device_ip_hw_init_phase1(adev);
2864         if (r)
2865                 goto init_failed;
2866
2867         r = amdgpu_device_fw_loading(adev);
2868         if (r)
2869                 goto init_failed;
2870
2871         r = amdgpu_device_ip_hw_init_phase2(adev);
2872         if (r)
2873                 goto init_failed;
2874
2875         /*
2876          * retired pages will be loaded from eeprom and reserved here,
2877          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2878          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2879          * for I2C communication which only true at this point.
2880          *
2881          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2882          * failure from bad gpu situation and stop amdgpu init process
2883          * accordingly. For other failed cases, it will still release all
2884          * the resource and print error message, rather than returning one
2885          * negative value to upper level.
2886          *
2887          * Note: theoretically, this should be called before all vram allocations
2888          * to protect retired page from abusing
2889          */
2890         r = amdgpu_ras_recovery_init(adev);
2891         if (r)
2892                 goto init_failed;
2893
2894         /**
2895          * In case of XGMI grab extra reference for reset domain for this device
2896          */
2897         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2898                 if (amdgpu_xgmi_add_device(adev) == 0) {
2899                         if (!amdgpu_sriov_vf(adev)) {
2900                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2901
2902                                 if (WARN_ON(!hive)) {
2903                                         r = -ENOENT;
2904                                         goto init_failed;
2905                                 }
2906
2907                                 if (!hive->reset_domain ||
2908                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2909                                         r = -ENOENT;
2910                                         amdgpu_put_xgmi_hive(hive);
2911                                         goto init_failed;
2912                                 }
2913
2914                                 /* Drop the early temporary reset domain we created for device */
2915                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2916                                 adev->reset_domain = hive->reset_domain;
2917                                 amdgpu_put_xgmi_hive(hive);
2918                         }
2919                 }
2920         }
2921
2922         r = amdgpu_device_init_schedulers(adev);
2923         if (r)
2924                 goto init_failed;
2925
2926         if (adev->mman.buffer_funcs_ring->sched.ready)
2927                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2928
2929         /* Don't init kfd if whole hive need to be reset during init */
2930         if (!adev->gmc.xgmi.pending_reset) {
2931                 kgd2kfd_init_zone_device(adev);
2932                 amdgpu_amdkfd_device_init(adev);
2933         }
2934
2935         amdgpu_fru_get_product_info(adev);
2936
2937 init_failed:
2938
2939         return r;
2940 }
2941
2942 /**
2943  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2944  *
2945  * @adev: amdgpu_device pointer
2946  *
2947  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2948  * this function before a GPU reset.  If the value is retained after a
2949  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2950  */
2951 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2952 {
2953         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2954 }
2955
2956 /**
2957  * amdgpu_device_check_vram_lost - check if vram is valid
2958  *
2959  * @adev: amdgpu_device pointer
2960  *
2961  * Checks the reset magic value written to the gart pointer in VRAM.
2962  * The driver calls this after a GPU reset to see if the contents of
2963  * VRAM is lost or now.
2964  * returns true if vram is lost, false if not.
2965  */
2966 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2967 {
2968         if (memcmp(adev->gart.ptr, adev->reset_magic,
2969                         AMDGPU_RESET_MAGIC_NUM))
2970                 return true;
2971
2972         if (!amdgpu_in_reset(adev))
2973                 return false;
2974
2975         /*
2976          * For all ASICs with baco/mode1 reset, the VRAM is
2977          * always assumed to be lost.
2978          */
2979         switch (amdgpu_asic_reset_method(adev)) {
2980         case AMD_RESET_METHOD_BACO:
2981         case AMD_RESET_METHOD_MODE1:
2982                 return true;
2983         default:
2984                 return false;
2985         }
2986 }
2987
2988 /**
2989  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2990  *
2991  * @adev: amdgpu_device pointer
2992  * @state: clockgating state (gate or ungate)
2993  *
2994  * The list of all the hardware IPs that make up the asic is walked and the
2995  * set_clockgating_state callbacks are run.
2996  * Late initialization pass enabling clockgating for hardware IPs.
2997  * Fini or suspend, pass disabling clockgating for hardware IPs.
2998  * Returns 0 on success, negative error code on failure.
2999  */
3000
3001 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3002                                enum amd_clockgating_state state)
3003 {
3004         int i, j, r;
3005
3006         if (amdgpu_emu_mode == 1)
3007                 return 0;
3008
3009         for (j = 0; j < adev->num_ip_blocks; j++) {
3010                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3011                 if (!adev->ip_blocks[i].status.late_initialized)
3012                         continue;
3013                 /* skip CG for GFX, SDMA on S0ix */
3014                 if (adev->in_s0ix &&
3015                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3016                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3017                         continue;
3018                 /* skip CG for VCE/UVD, it's handled specially */
3019                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3020                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3021                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3022                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3023                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3024                         /* enable clockgating to save power */
3025                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3026                                                                                      state);
3027                         if (r) {
3028                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3029                                           adev->ip_blocks[i].version->funcs->name, r);
3030                                 return r;
3031                         }
3032                 }
3033         }
3034
3035         return 0;
3036 }
3037
3038 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3039                                enum amd_powergating_state state)
3040 {
3041         int i, j, r;
3042
3043         if (amdgpu_emu_mode == 1)
3044                 return 0;
3045
3046         for (j = 0; j < adev->num_ip_blocks; j++) {
3047                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3048                 if (!adev->ip_blocks[i].status.late_initialized)
3049                         continue;
3050                 /* skip PG for GFX, SDMA on S0ix */
3051                 if (adev->in_s0ix &&
3052                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3053                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3054                         continue;
3055                 /* skip CG for VCE/UVD, it's handled specially */
3056                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3057                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3058                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3059                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3060                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
3061                         /* enable powergating to save power */
3062                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3063                                                                                         state);
3064                         if (r) {
3065                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3066                                           adev->ip_blocks[i].version->funcs->name, r);
3067                                 return r;
3068                         }
3069                 }
3070         }
3071         return 0;
3072 }
3073
3074 static int amdgpu_device_enable_mgpu_fan_boost(void)
3075 {
3076         struct amdgpu_gpu_instance *gpu_ins;
3077         struct amdgpu_device *adev;
3078         int i, ret = 0;
3079
3080         mutex_lock(&mgpu_info.mutex);
3081
3082         /*
3083          * MGPU fan boost feature should be enabled
3084          * only when there are two or more dGPUs in
3085          * the system
3086          */
3087         if (mgpu_info.num_dgpu < 2)
3088                 goto out;
3089
3090         for (i = 0; i < mgpu_info.num_dgpu; i++) {
3091                 gpu_ins = &(mgpu_info.gpu_ins[i]);
3092                 adev = gpu_ins->adev;
3093                 if (!(adev->flags & AMD_IS_APU) &&
3094                     !gpu_ins->mgpu_fan_enabled) {
3095                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3096                         if (ret)
3097                                 break;
3098
3099                         gpu_ins->mgpu_fan_enabled = 1;
3100                 }
3101         }
3102
3103 out:
3104         mutex_unlock(&mgpu_info.mutex);
3105
3106         return ret;
3107 }
3108
3109 /**
3110  * amdgpu_device_ip_late_init - run late init for hardware IPs
3111  *
3112  * @adev: amdgpu_device pointer
3113  *
3114  * Late initialization pass for hardware IPs.  The list of all the hardware
3115  * IPs that make up the asic is walked and the late_init callbacks are run.
3116  * late_init covers any special initialization that an IP requires
3117  * after all of the have been initialized or something that needs to happen
3118  * late in the init process.
3119  * Returns 0 on success, negative error code on failure.
3120  */
3121 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3122 {
3123         struct amdgpu_gpu_instance *gpu_instance;
3124         int i = 0, r;
3125
3126         for (i = 0; i < adev->num_ip_blocks; i++) {
3127                 if (!adev->ip_blocks[i].status.hw)
3128                         continue;
3129                 if (adev->ip_blocks[i].version->funcs->late_init) {
3130                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3131                         if (r) {
3132                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3133                                           adev->ip_blocks[i].version->funcs->name, r);
3134                                 return r;
3135                         }
3136                 }
3137                 adev->ip_blocks[i].status.late_initialized = true;
3138         }
3139
3140         r = amdgpu_ras_late_init(adev);
3141         if (r) {
3142                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3143                 return r;
3144         }
3145
3146         amdgpu_ras_set_error_query_ready(adev, true);
3147
3148         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3149         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3150
3151         amdgpu_device_fill_reset_magic(adev);
3152
3153         r = amdgpu_device_enable_mgpu_fan_boost();
3154         if (r)
3155                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3156
3157         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3158         if (amdgpu_passthrough(adev) &&
3159             ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3160              adev->asic_type == CHIP_ALDEBARAN))
3161                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
3162
3163         if (adev->gmc.xgmi.num_physical_nodes > 1) {
3164                 mutex_lock(&mgpu_info.mutex);
3165
3166                 /*
3167                  * Reset device p-state to low as this was booted with high.
3168                  *
3169                  * This should be performed only after all devices from the same
3170                  * hive get initialized.
3171                  *
3172                  * However, it's unknown how many device in the hive in advance.
3173                  * As this is counted one by one during devices initializations.
3174                  *
3175                  * So, we wait for all XGMI interlinked devices initialized.
3176                  * This may bring some delays as those devices may come from
3177                  * different hives. But that should be OK.
3178                  */
3179                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3180                         for (i = 0; i < mgpu_info.num_gpu; i++) {
3181                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
3182                                 if (gpu_instance->adev->flags & AMD_IS_APU)
3183                                         continue;
3184
3185                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3186                                                 AMDGPU_XGMI_PSTATE_MIN);
3187                                 if (r) {
3188                                         DRM_ERROR("pstate setting failed (%d).\n", r);
3189                                         break;
3190                                 }
3191                         }
3192                 }
3193
3194                 mutex_unlock(&mgpu_info.mutex);
3195         }
3196
3197         return 0;
3198 }
3199
3200 /**
3201  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3202  *
3203  * @adev: amdgpu_device pointer
3204  *
3205  * For ASICs need to disable SMC first
3206  */
3207 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3208 {
3209         int i, r;
3210
3211         if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3212                 return;
3213
3214         for (i = 0; i < adev->num_ip_blocks; i++) {
3215                 if (!adev->ip_blocks[i].status.hw)
3216                         continue;
3217                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3218                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3219                         /* XXX handle errors */
3220                         if (r) {
3221                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3222                                           adev->ip_blocks[i].version->funcs->name, r);
3223                         }
3224                         adev->ip_blocks[i].status.hw = false;
3225                         break;
3226                 }
3227         }
3228 }
3229
3230 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3231 {
3232         int i, r;
3233
3234         for (i = 0; i < adev->num_ip_blocks; i++) {
3235                 if (!adev->ip_blocks[i].version->funcs->early_fini)
3236                         continue;
3237
3238                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3239                 if (r) {
3240                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3241                                   adev->ip_blocks[i].version->funcs->name, r);
3242                 }
3243         }
3244
3245         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3246         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3247
3248         amdgpu_amdkfd_suspend(adev, false);
3249
3250         /* Workaroud for ASICs need to disable SMC first */
3251         amdgpu_device_smu_fini_early(adev);
3252
3253         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3254                 if (!adev->ip_blocks[i].status.hw)
3255                         continue;
3256
3257                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3258                 /* XXX handle errors */
3259                 if (r) {
3260                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3261                                   adev->ip_blocks[i].version->funcs->name, r);
3262                 }
3263
3264                 adev->ip_blocks[i].status.hw = false;
3265         }
3266
3267         if (amdgpu_sriov_vf(adev)) {
3268                 if (amdgpu_virt_release_full_gpu(adev, false))
3269                         DRM_ERROR("failed to release exclusive mode on fini\n");
3270         }
3271
3272         return 0;
3273 }
3274
3275 /**
3276  * amdgpu_device_ip_fini - run fini for hardware IPs
3277  *
3278  * @adev: amdgpu_device pointer
3279  *
3280  * Main teardown pass for hardware IPs.  The list of all the hardware
3281  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3282  * are run.  hw_fini tears down the hardware associated with each IP
3283  * and sw_fini tears down any software state associated with each IP.
3284  * Returns 0 on success, negative error code on failure.
3285  */
3286 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3287 {
3288         int i, r;
3289
3290         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3291                 amdgpu_virt_release_ras_err_handler_data(adev);
3292
3293         if (adev->gmc.xgmi.num_physical_nodes > 1)
3294                 amdgpu_xgmi_remove_device(adev);
3295
3296         amdgpu_amdkfd_device_fini_sw(adev);
3297
3298         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3299                 if (!adev->ip_blocks[i].status.sw)
3300                         continue;
3301
3302                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3303                         amdgpu_ucode_free_bo(adev);
3304                         amdgpu_free_static_csa(&adev->virt.csa_obj);
3305                         amdgpu_device_wb_fini(adev);
3306                         amdgpu_device_mem_scratch_fini(adev);
3307                         amdgpu_ib_pool_fini(adev);
3308                         amdgpu_seq64_fini(adev);
3309                 }
3310
3311                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3312                 /* XXX handle errors */
3313                 if (r) {
3314                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3315                                   adev->ip_blocks[i].version->funcs->name, r);
3316                 }
3317                 adev->ip_blocks[i].status.sw = false;
3318                 adev->ip_blocks[i].status.valid = false;
3319         }
3320
3321         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3322                 if (!adev->ip_blocks[i].status.late_initialized)
3323                         continue;
3324                 if (adev->ip_blocks[i].version->funcs->late_fini)
3325                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3326                 adev->ip_blocks[i].status.late_initialized = false;
3327         }
3328
3329         amdgpu_ras_fini(adev);
3330
3331         return 0;
3332 }
3333
3334 /**
3335  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3336  *
3337  * @work: work_struct.
3338  */
3339 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3340 {
3341         struct amdgpu_device *adev =
3342                 container_of(work, struct amdgpu_device, delayed_init_work.work);
3343         int r;
3344
3345         r = amdgpu_ib_ring_tests(adev);
3346         if (r)
3347                 DRM_ERROR("ib ring test failed (%d).\n", r);
3348 }
3349
3350 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3351 {
3352         struct amdgpu_device *adev =
3353                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3354
3355         WARN_ON_ONCE(adev->gfx.gfx_off_state);
3356         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3357
3358         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3359                 adev->gfx.gfx_off_state = true;
3360 }
3361
3362 /**
3363  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3364  *
3365  * @adev: amdgpu_device pointer
3366  *
3367  * Main suspend function for hardware IPs.  The list of all the hardware
3368  * IPs that make up the asic is walked, clockgating is disabled and the
3369  * suspend callbacks are run.  suspend puts the hardware and software state
3370  * in each IP into a state suitable for suspend.
3371  * Returns 0 on success, negative error code on failure.
3372  */
3373 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3374 {
3375         int i, r;
3376
3377         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3378         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3379
3380         /*
3381          * Per PMFW team's suggestion, driver needs to handle gfxoff
3382          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3383          * scenario. Add the missing df cstate disablement here.
3384          */
3385         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3386                 dev_warn(adev->dev, "Failed to disallow df cstate");
3387
3388         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3389                 if (!adev->ip_blocks[i].status.valid)
3390                         continue;
3391
3392                 /* displays are handled separately */
3393                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3394                         continue;
3395
3396                 /* XXX handle errors */
3397                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3398                 /* XXX handle errors */
3399                 if (r) {
3400                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3401                                   adev->ip_blocks[i].version->funcs->name, r);
3402                         return r;
3403                 }
3404
3405                 adev->ip_blocks[i].status.hw = false;
3406         }
3407
3408         return 0;
3409 }
3410
3411 /**
3412  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3413  *
3414  * @adev: amdgpu_device pointer
3415  *
3416  * Main suspend function for hardware IPs.  The list of all the hardware
3417  * IPs that make up the asic is walked, clockgating is disabled and the
3418  * suspend callbacks are run.  suspend puts the hardware and software state
3419  * in each IP into a state suitable for suspend.
3420  * Returns 0 on success, negative error code on failure.
3421  */
3422 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3423 {
3424         int i, r;
3425
3426         if (adev->in_s0ix)
3427                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3428
3429         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3430                 if (!adev->ip_blocks[i].status.valid)
3431                         continue;
3432                 /* displays are handled in phase1 */
3433                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3434                         continue;
3435                 /* PSP lost connection when err_event_athub occurs */
3436                 if (amdgpu_ras_intr_triggered() &&
3437                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3438                         adev->ip_blocks[i].status.hw = false;
3439                         continue;
3440                 }
3441
3442                 /* skip unnecessary suspend if we do not initialize them yet */
3443                 if (adev->gmc.xgmi.pending_reset &&
3444                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3445                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3446                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3447                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3448                         adev->ip_blocks[i].status.hw = false;
3449                         continue;
3450                 }
3451
3452                 /* skip suspend of gfx/mes and psp for S0ix
3453                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3454                  * like at runtime. PSP is also part of the always on hardware
3455                  * so no need to suspend it.
3456                  */
3457                 if (adev->in_s0ix &&
3458                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3459                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3460                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3461                         continue;
3462
3463                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3464                 if (adev->in_s0ix &&
3465                     (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3466                      IP_VERSION(5, 0, 0)) &&
3467                     (adev->ip_blocks[i].version->type ==
3468                      AMD_IP_BLOCK_TYPE_SDMA))
3469                         continue;
3470
3471                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3472                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3473                  * from this location and RLC Autoload automatically also gets loaded
3474                  * from here based on PMFW -> PSP message during re-init sequence.
3475                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3476                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3477                  */
3478                 if (amdgpu_in_reset(adev) &&
3479                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3480                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3481                         continue;
3482
3483                 /* XXX handle errors */
3484                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3485                 /* XXX handle errors */
3486                 if (r) {
3487                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3488                                   adev->ip_blocks[i].version->funcs->name, r);
3489                 }
3490                 adev->ip_blocks[i].status.hw = false;
3491                 /* handle putting the SMC in the appropriate state */
3492                 if (!amdgpu_sriov_vf(adev)) {
3493                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3494                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3495                                 if (r) {
3496                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3497                                                         adev->mp1_state, r);
3498                                         return r;
3499                                 }
3500                         }
3501                 }
3502         }
3503
3504         return 0;
3505 }
3506
3507 /**
3508  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3509  *
3510  * @adev: amdgpu_device pointer
3511  *
3512  * Main suspend function for hardware IPs.  The list of all the hardware
3513  * IPs that make up the asic is walked, clockgating is disabled and the
3514  * suspend callbacks are run.  suspend puts the hardware and software state
3515  * in each IP into a state suitable for suspend.
3516  * Returns 0 on success, negative error code on failure.
3517  */
3518 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3519 {
3520         int r;
3521
3522         if (amdgpu_sriov_vf(adev)) {
3523                 amdgpu_virt_fini_data_exchange(adev);
3524                 amdgpu_virt_request_full_gpu(adev, false);
3525         }
3526
3527         amdgpu_ttm_set_buffer_funcs_status(adev, false);
3528
3529         r = amdgpu_device_ip_suspend_phase1(adev);
3530         if (r)
3531                 return r;
3532         r = amdgpu_device_ip_suspend_phase2(adev);
3533
3534         if (amdgpu_sriov_vf(adev))
3535                 amdgpu_virt_release_full_gpu(adev, false);
3536
3537         return r;
3538 }
3539
3540 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3541 {
3542         int i, r;
3543
3544         static enum amd_ip_block_type ip_order[] = {
3545                 AMD_IP_BLOCK_TYPE_COMMON,
3546                 AMD_IP_BLOCK_TYPE_GMC,
3547                 AMD_IP_BLOCK_TYPE_PSP,
3548                 AMD_IP_BLOCK_TYPE_IH,
3549         };
3550
3551         for (i = 0; i < adev->num_ip_blocks; i++) {
3552                 int j;
3553                 struct amdgpu_ip_block *block;
3554
3555                 block = &adev->ip_blocks[i];
3556                 block->status.hw = false;
3557
3558                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3559
3560                         if (block->version->type != ip_order[j] ||
3561                                 !block->status.valid)
3562                                 continue;
3563
3564                         r = block->version->funcs->hw_init(adev);
3565                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3566                         if (r)
3567                                 return r;
3568                         block->status.hw = true;
3569                 }
3570         }
3571
3572         return 0;
3573 }
3574
3575 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3576 {
3577         int i, r;
3578
3579         static enum amd_ip_block_type ip_order[] = {
3580                 AMD_IP_BLOCK_TYPE_SMC,
3581                 AMD_IP_BLOCK_TYPE_DCE,
3582                 AMD_IP_BLOCK_TYPE_GFX,
3583                 AMD_IP_BLOCK_TYPE_SDMA,
3584                 AMD_IP_BLOCK_TYPE_MES,
3585                 AMD_IP_BLOCK_TYPE_UVD,
3586                 AMD_IP_BLOCK_TYPE_VCE,
3587                 AMD_IP_BLOCK_TYPE_VCN,
3588                 AMD_IP_BLOCK_TYPE_JPEG
3589         };
3590
3591         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3592                 int j;
3593                 struct amdgpu_ip_block *block;
3594
3595                 for (j = 0; j < adev->num_ip_blocks; j++) {
3596                         block = &adev->ip_blocks[j];
3597
3598                         if (block->version->type != ip_order[i] ||
3599                                 !block->status.valid ||
3600                                 block->status.hw)
3601                                 continue;
3602
3603                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3604                                 r = block->version->funcs->resume(adev);
3605                         else
3606                                 r = block->version->funcs->hw_init(adev);
3607
3608                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3609                         if (r)
3610                                 return r;
3611                         block->status.hw = true;
3612                 }
3613         }
3614
3615         return 0;
3616 }
3617
3618 /**
3619  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3620  *
3621  * @adev: amdgpu_device pointer
3622  *
3623  * First resume function for hardware IPs.  The list of all the hardware
3624  * IPs that make up the asic is walked and the resume callbacks are run for
3625  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3626  * after a suspend and updates the software state as necessary.  This
3627  * function is also used for restoring the GPU after a GPU reset.
3628  * Returns 0 on success, negative error code on failure.
3629  */
3630 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3631 {
3632         int i, r;
3633
3634         for (i = 0; i < adev->num_ip_blocks; i++) {
3635                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3636                         continue;
3637                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3638                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3639                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3640                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3641
3642                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3643                         if (r) {
3644                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3645                                           adev->ip_blocks[i].version->funcs->name, r);
3646                                 return r;
3647                         }
3648                         adev->ip_blocks[i].status.hw = true;
3649                 }
3650         }
3651
3652         return 0;
3653 }
3654
3655 /**
3656  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3657  *
3658  * @adev: amdgpu_device pointer
3659  *
3660  * First resume function for hardware IPs.  The list of all the hardware
3661  * IPs that make up the asic is walked and the resume callbacks are run for
3662  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3663  * functional state after a suspend and updates the software state as
3664  * necessary.  This function is also used for restoring the GPU after a GPU
3665  * reset.
3666  * Returns 0 on success, negative error code on failure.
3667  */
3668 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3669 {
3670         int i, r;
3671
3672         for (i = 0; i < adev->num_ip_blocks; i++) {
3673                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3674                         continue;
3675                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3676                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3677                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3678                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3679                         continue;
3680                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3681                 if (r) {
3682                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3683                                   adev->ip_blocks[i].version->funcs->name, r);
3684                         return r;
3685                 }
3686                 adev->ip_blocks[i].status.hw = true;
3687         }
3688
3689         return 0;
3690 }
3691
3692 /**
3693  * amdgpu_device_ip_resume - run resume for hardware IPs
3694  *
3695  * @adev: amdgpu_device pointer
3696  *
3697  * Main resume function for hardware IPs.  The hardware IPs
3698  * are split into two resume functions because they are
3699  * also used in recovering from a GPU reset and some additional
3700  * steps need to be take between them.  In this case (S3/S4) they are
3701  * run sequentially.
3702  * Returns 0 on success, negative error code on failure.
3703  */
3704 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3705 {
3706         int r;
3707
3708         r = amdgpu_device_ip_resume_phase1(adev);
3709         if (r)
3710                 return r;
3711
3712         r = amdgpu_device_fw_loading(adev);
3713         if (r)
3714                 return r;
3715
3716         r = amdgpu_device_ip_resume_phase2(adev);
3717
3718         if (adev->mman.buffer_funcs_ring->sched.ready)
3719                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3720
3721         return r;
3722 }
3723
3724 /**
3725  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3726  *
3727  * @adev: amdgpu_device pointer
3728  *
3729  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3730  */
3731 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3732 {
3733         if (amdgpu_sriov_vf(adev)) {
3734                 if (adev->is_atom_fw) {
3735                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3736                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3737                 } else {
3738                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3739                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3740                 }
3741
3742                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3743                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3744         }
3745 }
3746
3747 /**
3748  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3749  *
3750  * @asic_type: AMD asic type
3751  *
3752  * Check if there is DC (new modesetting infrastructre) support for an asic.
3753  * returns true if DC has support, false if not.
3754  */
3755 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3756 {
3757         switch (asic_type) {
3758 #ifdef CONFIG_DRM_AMDGPU_SI
3759         case CHIP_HAINAN:
3760 #endif
3761         case CHIP_TOPAZ:
3762                 /* chips with no display hardware */
3763                 return false;
3764 #if defined(CONFIG_DRM_AMD_DC)
3765         case CHIP_TAHITI:
3766         case CHIP_PITCAIRN:
3767         case CHIP_VERDE:
3768         case CHIP_OLAND:
3769                 /*
3770                  * We have systems in the wild with these ASICs that require
3771                  * LVDS and VGA support which is not supported with DC.
3772                  *
3773                  * Fallback to the non-DC driver here by default so as not to
3774                  * cause regressions.
3775                  */
3776 #if defined(CONFIG_DRM_AMD_DC_SI)
3777                 return amdgpu_dc > 0;
3778 #else
3779                 return false;
3780 #endif
3781         case CHIP_BONAIRE:
3782         case CHIP_KAVERI:
3783         case CHIP_KABINI:
3784         case CHIP_MULLINS:
3785                 /*
3786                  * We have systems in the wild with these ASICs that require
3787                  * VGA support which is not supported with DC.
3788                  *
3789                  * Fallback to the non-DC driver here by default so as not to
3790                  * cause regressions.
3791                  */
3792                 return amdgpu_dc > 0;
3793         default:
3794                 return amdgpu_dc != 0;
3795 #else
3796         default:
3797                 if (amdgpu_dc > 0)
3798                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3799                 return false;
3800 #endif
3801         }
3802 }
3803
3804 /**
3805  * amdgpu_device_has_dc_support - check if dc is supported
3806  *
3807  * @adev: amdgpu_device pointer
3808  *
3809  * Returns true for supported, false for not supported
3810  */
3811 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3812 {
3813         if (adev->enable_virtual_display ||
3814             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3815                 return false;
3816
3817         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3818 }
3819
3820 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3821 {
3822         struct amdgpu_device *adev =
3823                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3824         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3825
3826         /* It's a bug to not have a hive within this function */
3827         if (WARN_ON(!hive))
3828                 return;
3829
3830         /*
3831          * Use task barrier to synchronize all xgmi reset works across the
3832          * hive. task_barrier_enter and task_barrier_exit will block
3833          * until all the threads running the xgmi reset works reach
3834          * those points. task_barrier_full will do both blocks.
3835          */
3836         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3837
3838                 task_barrier_enter(&hive->tb);
3839                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3840
3841                 if (adev->asic_reset_res)
3842                         goto fail;
3843
3844                 task_barrier_exit(&hive->tb);
3845                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3846
3847                 if (adev->asic_reset_res)
3848                         goto fail;
3849
3850                 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3851         } else {
3852
3853                 task_barrier_full(&hive->tb);
3854                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3855         }
3856
3857 fail:
3858         if (adev->asic_reset_res)
3859                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3860                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3861         amdgpu_put_xgmi_hive(hive);
3862 }
3863
3864 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3865 {
3866         char *input = amdgpu_lockup_timeout;
3867         char *timeout_setting = NULL;
3868         int index = 0;
3869         long timeout;
3870         int ret = 0;
3871
3872         /*
3873          * By default timeout for non compute jobs is 10000
3874          * and 60000 for compute jobs.
3875          * In SR-IOV or passthrough mode, timeout for compute
3876          * jobs are 60000 by default.
3877          */
3878         adev->gfx_timeout = msecs_to_jiffies(10000);
3879         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3880         if (amdgpu_sriov_vf(adev))
3881                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3882                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3883         else
3884                 adev->compute_timeout =  msecs_to_jiffies(60000);
3885
3886         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3887                 while ((timeout_setting = strsep(&input, ",")) &&
3888                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3889                         ret = kstrtol(timeout_setting, 0, &timeout);
3890                         if (ret)
3891                                 return ret;
3892
3893                         if (timeout == 0) {
3894                                 index++;
3895                                 continue;
3896                         } else if (timeout < 0) {
3897                                 timeout = MAX_SCHEDULE_TIMEOUT;
3898                                 dev_warn(adev->dev, "lockup timeout disabled");
3899                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3900                         } else {
3901                                 timeout = msecs_to_jiffies(timeout);
3902                         }
3903
3904                         switch (index++) {
3905                         case 0:
3906                                 adev->gfx_timeout = timeout;
3907                                 break;
3908                         case 1:
3909                                 adev->compute_timeout = timeout;
3910                                 break;
3911                         case 2:
3912                                 adev->sdma_timeout = timeout;
3913                                 break;
3914                         case 3:
3915                                 adev->video_timeout = timeout;
3916                                 break;
3917                         default:
3918                                 break;
3919                         }
3920                 }
3921                 /*
3922                  * There is only one value specified and
3923                  * it should apply to all non-compute jobs.
3924                  */
3925                 if (index == 1) {
3926                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3927                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3928                                 adev->compute_timeout = adev->gfx_timeout;
3929                 }
3930         }
3931
3932         return ret;
3933 }
3934
3935 /**
3936  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3937  *
3938  * @adev: amdgpu_device pointer
3939  *
3940  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3941  */
3942 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3943 {
3944         struct iommu_domain *domain;
3945
3946         domain = iommu_get_domain_for_dev(adev->dev);
3947         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3948                 adev->ram_is_direct_mapped = true;
3949 }
3950
3951 static const struct attribute *amdgpu_dev_attributes[] = {
3952         &dev_attr_pcie_replay_count.attr,
3953         NULL
3954 };
3955
3956 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3957 {
3958         if (amdgpu_mcbp == 1)
3959                 adev->gfx.mcbp = true;
3960         else if (amdgpu_mcbp == 0)
3961                 adev->gfx.mcbp = false;
3962
3963         if (amdgpu_sriov_vf(adev))
3964                 adev->gfx.mcbp = true;
3965
3966         if (adev->gfx.mcbp)
3967                 DRM_INFO("MCBP is enabled\n");
3968 }
3969
3970 /**
3971  * amdgpu_device_init - initialize the driver
3972  *
3973  * @adev: amdgpu_device pointer
3974  * @flags: driver flags
3975  *
3976  * Initializes the driver info and hw (all asics).
3977  * Returns 0 for success or an error on failure.
3978  * Called at driver startup.
3979  */
3980 int amdgpu_device_init(struct amdgpu_device *adev,
3981                        uint32_t flags)
3982 {
3983         struct drm_device *ddev = adev_to_drm(adev);
3984         struct pci_dev *pdev = adev->pdev;
3985         int r, i;
3986         bool px = false;
3987         u32 max_MBps;
3988         int tmp;
3989
3990         adev->shutdown = false;
3991         adev->flags = flags;
3992
3993         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3994                 adev->asic_type = amdgpu_force_asic_type;
3995         else
3996                 adev->asic_type = flags & AMD_ASIC_MASK;
3997
3998         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3999         if (amdgpu_emu_mode == 1)
4000                 adev->usec_timeout *= 10;
4001         adev->gmc.gart_size = 512 * 1024 * 1024;
4002         adev->accel_working = false;
4003         adev->num_rings = 0;
4004         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4005         adev->mman.buffer_funcs = NULL;
4006         adev->mman.buffer_funcs_ring = NULL;
4007         adev->vm_manager.vm_pte_funcs = NULL;
4008         adev->vm_manager.vm_pte_num_scheds = 0;
4009         adev->gmc.gmc_funcs = NULL;
4010         adev->harvest_ip_mask = 0x0;
4011         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4012         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4013
4014         adev->smc_rreg = &amdgpu_invalid_rreg;
4015         adev->smc_wreg = &amdgpu_invalid_wreg;
4016         adev->pcie_rreg = &amdgpu_invalid_rreg;
4017         adev->pcie_wreg = &amdgpu_invalid_wreg;
4018         adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4019         adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4020         adev->pciep_rreg = &amdgpu_invalid_rreg;
4021         adev->pciep_wreg = &amdgpu_invalid_wreg;
4022         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4023         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4024         adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4025         adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4026         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4027         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4028         adev->didt_rreg = &amdgpu_invalid_rreg;
4029         adev->didt_wreg = &amdgpu_invalid_wreg;
4030         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4031         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4032         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4033         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4034
4035         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4036                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4037                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4038
4039         /* mutex initialization are all done here so we
4040          * can recall function without having locking issues
4041          */
4042         mutex_init(&adev->firmware.mutex);
4043         mutex_init(&adev->pm.mutex);
4044         mutex_init(&adev->gfx.gpu_clock_mutex);
4045         mutex_init(&adev->srbm_mutex);
4046         mutex_init(&adev->gfx.pipe_reserve_mutex);
4047         mutex_init(&adev->gfx.gfx_off_mutex);
4048         mutex_init(&adev->gfx.partition_mutex);
4049         mutex_init(&adev->grbm_idx_mutex);
4050         mutex_init(&adev->mn_lock);
4051         mutex_init(&adev->virt.vf_errors.lock);
4052         mutex_init(&adev->virt.rlcg_reg_lock);
4053         hash_init(adev->mn_hash);
4054         mutex_init(&adev->psp.mutex);
4055         mutex_init(&adev->notifier_lock);
4056         mutex_init(&adev->pm.stable_pstate_ctx_lock);
4057         mutex_init(&adev->benchmark_mutex);
4058
4059         amdgpu_device_init_apu_flags(adev);
4060
4061         r = amdgpu_device_check_arguments(adev);
4062         if (r)
4063                 return r;
4064
4065         spin_lock_init(&adev->mmio_idx_lock);
4066         spin_lock_init(&adev->smc_idx_lock);
4067         spin_lock_init(&adev->pcie_idx_lock);
4068         spin_lock_init(&adev->uvd_ctx_idx_lock);
4069         spin_lock_init(&adev->didt_idx_lock);
4070         spin_lock_init(&adev->gc_cac_idx_lock);
4071         spin_lock_init(&adev->se_cac_idx_lock);
4072         spin_lock_init(&adev->audio_endpt_idx_lock);
4073         spin_lock_init(&adev->mm_stats.lock);
4074         spin_lock_init(&adev->wb.lock);
4075
4076         INIT_LIST_HEAD(&adev->shadow_list);
4077         mutex_init(&adev->shadow_list_lock);
4078
4079         INIT_LIST_HEAD(&adev->reset_list);
4080
4081         INIT_LIST_HEAD(&adev->ras_list);
4082
4083         INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4084
4085         INIT_DELAYED_WORK(&adev->delayed_init_work,
4086                           amdgpu_device_delayed_init_work_handler);
4087         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4088                           amdgpu_device_delay_enable_gfx_off);
4089
4090         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4091
4092         adev->gfx.gfx_off_req_count = 1;
4093         adev->gfx.gfx_off_residency = 0;
4094         adev->gfx.gfx_off_entrycount = 0;
4095         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4096
4097         atomic_set(&adev->throttling_logging_enabled, 1);
4098         /*
4099          * If throttling continues, logging will be performed every minute
4100          * to avoid log flooding. "-1" is subtracted since the thermal
4101          * throttling interrupt comes every second. Thus, the total logging
4102          * interval is 59 seconds(retelimited printk interval) + 1(waiting
4103          * for throttling interrupt) = 60 seconds.
4104          */
4105         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4106         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4107
4108         /* Registers mapping */
4109         /* TODO: block userspace mapping of io register */
4110         if (adev->asic_type >= CHIP_BONAIRE) {
4111                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4112                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4113         } else {
4114                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4115                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4116         }
4117
4118         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4119                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4120
4121         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4122         if (!adev->rmmio)
4123                 return -ENOMEM;
4124
4125         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4126         DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4127
4128         /*
4129          * Reset domain needs to be present early, before XGMI hive discovered
4130          * (if any) and intitialized to use reset sem and in_gpu reset flag
4131          * early on during init and before calling to RREG32.
4132          */
4133         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4134         if (!adev->reset_domain)
4135                 return -ENOMEM;
4136
4137         /* detect hw virtualization here */
4138         amdgpu_detect_virtualization(adev);
4139
4140         amdgpu_device_get_pcie_info(adev);
4141
4142         r = amdgpu_device_get_job_timeout_settings(adev);
4143         if (r) {
4144                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4145                 return r;
4146         }
4147
4148         amdgpu_device_set_mcbp(adev);
4149
4150         /* early init functions */
4151         r = amdgpu_device_ip_early_init(adev);
4152         if (r)
4153                 return r;
4154
4155         /* Get rid of things like offb */
4156         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4157         if (r)
4158                 return r;
4159
4160         /* Enable TMZ based on IP_VERSION */
4161         amdgpu_gmc_tmz_set(adev);
4162
4163         if (amdgpu_sriov_vf(adev) &&
4164             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4165                 /* VF MMIO access (except mailbox range) from CPU
4166                  * will be blocked during sriov runtime
4167                  */
4168                 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4169
4170         amdgpu_gmc_noretry_set(adev);
4171         /* Need to get xgmi info early to decide the reset behavior*/
4172         if (adev->gmc.xgmi.supported) {
4173                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4174                 if (r)
4175                         return r;
4176         }
4177
4178         /* enable PCIE atomic ops */
4179         if (amdgpu_sriov_vf(adev)) {
4180                 if (adev->virt.fw_reserve.p_pf2vf)
4181                         adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4182                                                       adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4183                                 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4184         /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4185          * internal path natively support atomics, set have_atomics_support to true.
4186          */
4187         } else if ((adev->flags & AMD_IS_APU) &&
4188                    (amdgpu_ip_version(adev, GC_HWIP, 0) >
4189                     IP_VERSION(9, 0, 0))) {
4190                 adev->have_atomics_support = true;
4191         } else {
4192                 adev->have_atomics_support =
4193                         !pci_enable_atomic_ops_to_root(adev->pdev,
4194                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4195                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4196         }
4197
4198         if (!adev->have_atomics_support)
4199                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4200
4201         /* doorbell bar mapping and doorbell index init*/
4202         amdgpu_doorbell_init(adev);
4203
4204         if (amdgpu_emu_mode == 1) {
4205                 /* post the asic on emulation mode */
4206                 emu_soc_asic_init(adev);
4207                 goto fence_driver_init;
4208         }
4209
4210         amdgpu_reset_init(adev);
4211
4212         /* detect if we are with an SRIOV vbios */
4213         if (adev->bios)
4214                 amdgpu_device_detect_sriov_bios(adev);
4215
4216         /* check if we need to reset the asic
4217          *  E.g., driver was not cleanly unloaded previously, etc.
4218          */
4219         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4220                 if (adev->gmc.xgmi.num_physical_nodes) {
4221                         dev_info(adev->dev, "Pending hive reset.\n");
4222                         adev->gmc.xgmi.pending_reset = true;
4223                         /* Only need to init necessary block for SMU to handle the reset */
4224                         for (i = 0; i < adev->num_ip_blocks; i++) {
4225                                 if (!adev->ip_blocks[i].status.valid)
4226                                         continue;
4227                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4228                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4229                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4230                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4231                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
4232                                                 adev->ip_blocks[i].version->funcs->name);
4233                                         adev->ip_blocks[i].status.hw = true;
4234                                 }
4235                         }
4236                 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4237                                    !amdgpu_device_has_display_hardware(adev)) {
4238                                         r = psp_gpu_reset(adev);
4239                 } else {
4240                                 tmp = amdgpu_reset_method;
4241                                 /* It should do a default reset when loading or reloading the driver,
4242                                  * regardless of the module parameter reset_method.
4243                                  */
4244                                 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4245                                 r = amdgpu_asic_reset(adev);
4246                                 amdgpu_reset_method = tmp;
4247                 }
4248
4249                 if (r) {
4250                   dev_err(adev->dev, "asic reset on init failed\n");
4251                   goto failed;
4252                 }
4253         }
4254
4255         /* Post card if necessary */
4256         if (amdgpu_device_need_post(adev)) {
4257                 if (!adev->bios) {
4258                         dev_err(adev->dev, "no vBIOS found\n");
4259                         r = -EINVAL;
4260                         goto failed;
4261                 }
4262                 DRM_INFO("GPU posting now...\n");
4263                 r = amdgpu_device_asic_init(adev);
4264                 if (r) {
4265                         dev_err(adev->dev, "gpu post error!\n");
4266                         goto failed;
4267                 }
4268         }
4269
4270         if (adev->bios) {
4271                 if (adev->is_atom_fw) {
4272                         /* Initialize clocks */
4273                         r = amdgpu_atomfirmware_get_clock_info(adev);
4274                         if (r) {
4275                                 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4276                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4277                                 goto failed;
4278                         }
4279                 } else {
4280                         /* Initialize clocks */
4281                         r = amdgpu_atombios_get_clock_info(adev);
4282                         if (r) {
4283                                 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4284                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4285                                 goto failed;
4286                         }
4287                         /* init i2c buses */
4288                         if (!amdgpu_device_has_dc_support(adev))
4289                                 amdgpu_atombios_i2c_init(adev);
4290                 }
4291         }
4292
4293 fence_driver_init:
4294         /* Fence driver */
4295         r = amdgpu_fence_driver_sw_init(adev);
4296         if (r) {
4297                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4298                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4299                 goto failed;
4300         }
4301
4302         /* init the mode config */
4303         drm_mode_config_init(adev_to_drm(adev));
4304
4305         r = amdgpu_device_ip_init(adev);
4306         if (r) {
4307                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4308                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4309                 goto release_ras_con;
4310         }
4311
4312         amdgpu_fence_driver_hw_init(adev);
4313
4314         dev_info(adev->dev,
4315                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4316                         adev->gfx.config.max_shader_engines,
4317                         adev->gfx.config.max_sh_per_se,
4318                         adev->gfx.config.max_cu_per_sh,
4319                         adev->gfx.cu_info.number);
4320
4321         adev->accel_working = true;
4322
4323         amdgpu_vm_check_compute_bug(adev);
4324
4325         /* Initialize the buffer migration limit. */
4326         if (amdgpu_moverate >= 0)
4327                 max_MBps = amdgpu_moverate;
4328         else
4329                 max_MBps = 8; /* Allow 8 MB/s. */
4330         /* Get a log2 for easy divisions. */
4331         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4332
4333         /*
4334          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4335          * Otherwise the mgpu fan boost feature will be skipped due to the
4336          * gpu instance is counted less.
4337          */
4338         amdgpu_register_gpu_instance(adev);
4339
4340         /* enable clockgating, etc. after ib tests, etc. since some blocks require
4341          * explicit gating rather than handling it automatically.
4342          */
4343         if (!adev->gmc.xgmi.pending_reset) {
4344                 r = amdgpu_device_ip_late_init(adev);
4345                 if (r) {
4346                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4347                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4348                         goto release_ras_con;
4349                 }
4350                 /* must succeed. */
4351                 amdgpu_ras_resume(adev);
4352                 queue_delayed_work(system_wq, &adev->delayed_init_work,
4353                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4354         }
4355
4356         if (amdgpu_sriov_vf(adev)) {
4357                 amdgpu_virt_release_full_gpu(adev, true);
4358                 flush_delayed_work(&adev->delayed_init_work);
4359         }
4360
4361         /*
4362          * Place those sysfs registering after `late_init`. As some of those
4363          * operations performed in `late_init` might affect the sysfs
4364          * interfaces creating.
4365          */
4366         r = amdgpu_atombios_sysfs_init(adev);
4367         if (r)
4368                 drm_err(&adev->ddev,
4369                         "registering atombios sysfs failed (%d).\n", r);
4370
4371         r = amdgpu_pm_sysfs_init(adev);
4372         if (r)
4373                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4374
4375         r = amdgpu_ucode_sysfs_init(adev);
4376         if (r) {
4377                 adev->ucode_sysfs_en = false;
4378                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4379         } else
4380                 adev->ucode_sysfs_en = true;
4381
4382         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4383         if (r)
4384                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
4385
4386         r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4387         if (r)
4388                 dev_err(adev->dev,
4389                         "Could not create amdgpu board attributes\n");
4390
4391         amdgpu_fru_sysfs_init(adev);
4392         amdgpu_reg_state_sysfs_init(adev);
4393
4394         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4395                 r = amdgpu_pmu_init(adev);
4396         if (r)
4397                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4398
4399         /* Have stored pci confspace at hand for restore in sudden PCI error */
4400         if (amdgpu_device_cache_pci_state(adev->pdev))
4401                 pci_restore_state(pdev);
4402
4403         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4404         /* this will fail for cards that aren't VGA class devices, just
4405          * ignore it
4406          */
4407         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4408                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4409
4410         px = amdgpu_device_supports_px(ddev);
4411
4412         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4413                                 apple_gmux_detect(NULL, NULL)))
4414                 vga_switcheroo_register_client(adev->pdev,
4415                                                &amdgpu_switcheroo_ops, px);
4416
4417         if (px)
4418                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4419
4420         if (adev->gmc.xgmi.pending_reset)
4421                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4422                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4423
4424         amdgpu_device_check_iommu_direct_map(adev);
4425
4426         return 0;
4427
4428 release_ras_con:
4429         if (amdgpu_sriov_vf(adev))
4430                 amdgpu_virt_release_full_gpu(adev, true);
4431
4432         /* failed in exclusive mode due to timeout */
4433         if (amdgpu_sriov_vf(adev) &&
4434                 !amdgpu_sriov_runtime(adev) &&
4435                 amdgpu_virt_mmio_blocked(adev) &&
4436                 !amdgpu_virt_wait_reset(adev)) {
4437                 dev_err(adev->dev, "VF exclusive mode timeout\n");
4438                 /* Don't send request since VF is inactive. */
4439                 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4440                 adev->virt.ops = NULL;
4441                 r = -EAGAIN;
4442         }
4443         amdgpu_release_ras_context(adev);
4444
4445 failed:
4446         amdgpu_vf_error_trans_all(adev);
4447
4448         return r;
4449 }
4450
4451 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4452 {
4453
4454         /* Clear all CPU mappings pointing to this device */
4455         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4456
4457         /* Unmap all mapped bars - Doorbell, registers and VRAM */
4458         amdgpu_doorbell_fini(adev);
4459
4460         iounmap(adev->rmmio);
4461         adev->rmmio = NULL;
4462         if (adev->mman.aper_base_kaddr)
4463                 iounmap(adev->mman.aper_base_kaddr);
4464         adev->mman.aper_base_kaddr = NULL;
4465
4466         /* Memory manager related */
4467         if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4468                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4469                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4470         }
4471 }
4472
4473 /**
4474  * amdgpu_device_fini_hw - tear down the driver
4475  *
4476  * @adev: amdgpu_device pointer
4477  *
4478  * Tear down the driver info (all asics).
4479  * Called at driver shutdown.
4480  */
4481 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4482 {
4483         dev_info(adev->dev, "amdgpu: finishing device.\n");
4484         flush_delayed_work(&adev->delayed_init_work);
4485         adev->shutdown = true;
4486
4487         /* make sure IB test finished before entering exclusive mode
4488          * to avoid preemption on IB test
4489          */
4490         if (amdgpu_sriov_vf(adev)) {
4491                 amdgpu_virt_request_full_gpu(adev, false);
4492                 amdgpu_virt_fini_data_exchange(adev);
4493         }
4494
4495         /* disable all interrupts */
4496         amdgpu_irq_disable_all(adev);
4497         if (adev->mode_info.mode_config_initialized) {
4498                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4499                         drm_helper_force_disable_all(adev_to_drm(adev));
4500                 else
4501                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4502         }
4503         amdgpu_fence_driver_hw_fini(adev);
4504
4505         if (adev->mman.initialized)
4506                 drain_workqueue(adev->mman.bdev.wq);
4507
4508         if (adev->pm.sysfs_initialized)
4509                 amdgpu_pm_sysfs_fini(adev);
4510         if (adev->ucode_sysfs_en)
4511                 amdgpu_ucode_sysfs_fini(adev);
4512         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4513         amdgpu_fru_sysfs_fini(adev);
4514
4515         amdgpu_reg_state_sysfs_fini(adev);
4516
4517         /* disable ras feature must before hw fini */
4518         amdgpu_ras_pre_fini(adev);
4519
4520         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4521
4522         amdgpu_device_ip_fini_early(adev);
4523
4524         amdgpu_irq_fini_hw(adev);
4525
4526         if (adev->mman.initialized)
4527                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4528
4529         amdgpu_gart_dummy_page_fini(adev);
4530
4531         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4532                 amdgpu_device_unmap_mmio(adev);
4533
4534 }
4535
4536 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4537 {
4538         int idx;
4539         bool px;
4540
4541         amdgpu_fence_driver_sw_fini(adev);
4542         amdgpu_device_ip_fini(adev);
4543         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4544         adev->accel_working = false;
4545         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4546
4547         amdgpu_reset_fini(adev);
4548
4549         /* free i2c buses */
4550         if (!amdgpu_device_has_dc_support(adev))
4551                 amdgpu_i2c_fini(adev);
4552
4553         if (amdgpu_emu_mode != 1)
4554                 amdgpu_atombios_fini(adev);
4555
4556         kfree(adev->bios);
4557         adev->bios = NULL;
4558
4559         kfree(adev->fru_info);
4560         adev->fru_info = NULL;
4561
4562         px = amdgpu_device_supports_px(adev_to_drm(adev));
4563
4564         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4565                                 apple_gmux_detect(NULL, NULL)))
4566                 vga_switcheroo_unregister_client(adev->pdev);
4567
4568         if (px)
4569                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4570
4571         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4572                 vga_client_unregister(adev->pdev);
4573
4574         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4575
4576                 iounmap(adev->rmmio);
4577                 adev->rmmio = NULL;
4578                 amdgpu_doorbell_fini(adev);
4579                 drm_dev_exit(idx);
4580         }
4581
4582         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4583                 amdgpu_pmu_fini(adev);
4584         if (adev->mman.discovery_bin)
4585                 amdgpu_discovery_fini(adev);
4586
4587         amdgpu_reset_put_reset_domain(adev->reset_domain);
4588         adev->reset_domain = NULL;
4589
4590         kfree(adev->pci_state);
4591
4592 }
4593
4594 /**
4595  * amdgpu_device_evict_resources - evict device resources
4596  * @adev: amdgpu device object
4597  *
4598  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4599  * of the vram memory type. Mainly used for evicting device resources
4600  * at suspend time.
4601  *
4602  */
4603 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4604 {
4605         int ret;
4606
4607         /* No need to evict vram on APUs for suspend to ram or s2idle */
4608         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4609                 return 0;
4610
4611         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4612         if (ret)
4613                 DRM_WARN("evicting device resources failed\n");
4614         return ret;
4615 }
4616
4617 /*
4618  * Suspend & resume.
4619  */
4620 /**
4621  * amdgpu_device_prepare - prepare for device suspend
4622  *
4623  * @dev: drm dev pointer
4624  *
4625  * Prepare to put the hw in the suspend state (all asics).
4626  * Returns 0 for success or an error on failure.
4627  * Called at driver suspend.
4628  */
4629 int amdgpu_device_prepare(struct drm_device *dev)
4630 {
4631         struct amdgpu_device *adev = drm_to_adev(dev);
4632         int i, r;
4633
4634         amdgpu_choose_low_power_state(adev);
4635
4636         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4637                 return 0;
4638
4639         /* Evict the majority of BOs before starting suspend sequence */
4640         r = amdgpu_device_evict_resources(adev);
4641         if (r)
4642                 goto unprepare;
4643
4644         flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4645
4646         for (i = 0; i < adev->num_ip_blocks; i++) {
4647                 if (!adev->ip_blocks[i].status.valid)
4648                         continue;
4649                 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4650                         continue;
4651                 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4652                 if (r)
4653                         goto unprepare;
4654         }
4655
4656         return 0;
4657
4658 unprepare:
4659         adev->in_s0ix = adev->in_s3 = false;
4660
4661         return r;
4662 }
4663
4664 /**
4665  * amdgpu_device_suspend - initiate device suspend
4666  *
4667  * @dev: drm dev pointer
4668  * @fbcon : notify the fbdev of suspend
4669  *
4670  * Puts the hw in the suspend state (all asics).
4671  * Returns 0 for success or an error on failure.
4672  * Called at driver suspend.
4673  */
4674 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4675 {
4676         struct amdgpu_device *adev = drm_to_adev(dev);
4677         int r = 0;
4678
4679         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4680                 return 0;
4681
4682         adev->in_suspend = true;
4683
4684         if (amdgpu_sriov_vf(adev)) {
4685                 amdgpu_virt_fini_data_exchange(adev);
4686                 r = amdgpu_virt_request_full_gpu(adev, false);
4687                 if (r)
4688                         return r;
4689         }
4690
4691         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4692                 DRM_WARN("smart shift update failed\n");
4693
4694         if (fbcon)
4695                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4696
4697         cancel_delayed_work_sync(&adev->delayed_init_work);
4698
4699         amdgpu_ras_suspend(adev);
4700
4701         amdgpu_device_ip_suspend_phase1(adev);
4702
4703         if (!adev->in_s0ix)
4704                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4705
4706         r = amdgpu_device_evict_resources(adev);
4707         if (r)
4708                 return r;
4709
4710         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4711
4712         amdgpu_fence_driver_hw_fini(adev);
4713
4714         amdgpu_device_ip_suspend_phase2(adev);
4715
4716         if (amdgpu_sriov_vf(adev))
4717                 amdgpu_virt_release_full_gpu(adev, false);
4718
4719         r = amdgpu_dpm_notify_rlc_state(adev, false);
4720         if (r)
4721                 return r;
4722
4723         return 0;
4724 }
4725
4726 /**
4727  * amdgpu_device_resume - initiate device resume
4728  *
4729  * @dev: drm dev pointer
4730  * @fbcon : notify the fbdev of resume
4731  *
4732  * Bring the hw back to operating state (all asics).
4733  * Returns 0 for success or an error on failure.
4734  * Called at driver resume.
4735  */
4736 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4737 {
4738         struct amdgpu_device *adev = drm_to_adev(dev);
4739         int r = 0;
4740
4741         if (amdgpu_sriov_vf(adev)) {
4742                 r = amdgpu_virt_request_full_gpu(adev, true);
4743                 if (r)
4744                         return r;
4745         }
4746
4747         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4748                 return 0;
4749
4750         if (adev->in_s0ix)
4751                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4752
4753         /* post card */
4754         if (amdgpu_device_need_post(adev)) {
4755                 r = amdgpu_device_asic_init(adev);
4756                 if (r)
4757                         dev_err(adev->dev, "amdgpu asic init failed\n");
4758         }
4759
4760         r = amdgpu_device_ip_resume(adev);
4761
4762         if (r) {
4763                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4764                 goto exit;
4765         }
4766         amdgpu_fence_driver_hw_init(adev);
4767
4768         if (!adev->in_s0ix) {
4769                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4770                 if (r)
4771                         goto exit;
4772         }
4773
4774         r = amdgpu_device_ip_late_init(adev);
4775         if (r)
4776                 goto exit;
4777
4778         queue_delayed_work(system_wq, &adev->delayed_init_work,
4779                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4780 exit:
4781         if (amdgpu_sriov_vf(adev)) {
4782                 amdgpu_virt_init_data_exchange(adev);
4783                 amdgpu_virt_release_full_gpu(adev, true);
4784         }
4785
4786         if (r)
4787                 return r;
4788
4789         /* Make sure IB tests flushed */
4790         flush_delayed_work(&adev->delayed_init_work);
4791
4792         if (fbcon)
4793                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4794
4795         amdgpu_ras_resume(adev);
4796
4797         if (adev->mode_info.num_crtc) {
4798                 /*
4799                  * Most of the connector probing functions try to acquire runtime pm
4800                  * refs to ensure that the GPU is powered on when connector polling is
4801                  * performed. Since we're calling this from a runtime PM callback,
4802                  * trying to acquire rpm refs will cause us to deadlock.
4803                  *
4804                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4805                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4806                  */
4807 #ifdef CONFIG_PM
4808                 dev->dev->power.disable_depth++;
4809 #endif
4810                 if (!adev->dc_enabled)
4811                         drm_helper_hpd_irq_event(dev);
4812                 else
4813                         drm_kms_helper_hotplug_event(dev);
4814 #ifdef CONFIG_PM
4815                 dev->dev->power.disable_depth--;
4816 #endif
4817         }
4818         adev->in_suspend = false;
4819
4820         if (adev->enable_mes)
4821                 amdgpu_mes_self_test(adev);
4822
4823         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4824                 DRM_WARN("smart shift update failed\n");
4825
4826         return 0;
4827 }
4828
4829 /**
4830  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4831  *
4832  * @adev: amdgpu_device pointer
4833  *
4834  * The list of all the hardware IPs that make up the asic is walked and
4835  * the check_soft_reset callbacks are run.  check_soft_reset determines
4836  * if the asic is still hung or not.
4837  * Returns true if any of the IPs are still in a hung state, false if not.
4838  */
4839 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4840 {
4841         int i;
4842         bool asic_hang = false;
4843
4844         if (amdgpu_sriov_vf(adev))
4845                 return true;
4846
4847         if (amdgpu_asic_need_full_reset(adev))
4848                 return true;
4849
4850         for (i = 0; i < adev->num_ip_blocks; i++) {
4851                 if (!adev->ip_blocks[i].status.valid)
4852                         continue;
4853                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4854                         adev->ip_blocks[i].status.hang =
4855                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4856                 if (adev->ip_blocks[i].status.hang) {
4857                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4858                         asic_hang = true;
4859                 }
4860         }
4861         return asic_hang;
4862 }
4863
4864 /**
4865  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4866  *
4867  * @adev: amdgpu_device pointer
4868  *
4869  * The list of all the hardware IPs that make up the asic is walked and the
4870  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4871  * handles any IP specific hardware or software state changes that are
4872  * necessary for a soft reset to succeed.
4873  * Returns 0 on success, negative error code on failure.
4874  */
4875 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4876 {
4877         int i, r = 0;
4878
4879         for (i = 0; i < adev->num_ip_blocks; i++) {
4880                 if (!adev->ip_blocks[i].status.valid)
4881                         continue;
4882                 if (adev->ip_blocks[i].status.hang &&
4883                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4884                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4885                         if (r)
4886                                 return r;
4887                 }
4888         }
4889
4890         return 0;
4891 }
4892
4893 /**
4894  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4895  *
4896  * @adev: amdgpu_device pointer
4897  *
4898  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4899  * reset is necessary to recover.
4900  * Returns true if a full asic reset is required, false if not.
4901  */
4902 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4903 {
4904         int i;
4905
4906         if (amdgpu_asic_need_full_reset(adev))
4907                 return true;
4908
4909         for (i = 0; i < adev->num_ip_blocks; i++) {
4910                 if (!adev->ip_blocks[i].status.valid)
4911                         continue;
4912                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4913                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4914                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4915                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4916                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4917                         if (adev->ip_blocks[i].status.hang) {
4918                                 dev_info(adev->dev, "Some block need full reset!\n");
4919                                 return true;
4920                         }
4921                 }
4922         }
4923         return false;
4924 }
4925
4926 /**
4927  * amdgpu_device_ip_soft_reset - do a soft reset
4928  *
4929  * @adev: amdgpu_device pointer
4930  *
4931  * The list of all the hardware IPs that make up the asic is walked and the
4932  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4933  * IP specific hardware or software state changes that are necessary to soft
4934  * reset the IP.
4935  * Returns 0 on success, negative error code on failure.
4936  */
4937 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4938 {
4939         int i, r = 0;
4940
4941         for (i = 0; i < adev->num_ip_blocks; i++) {
4942                 if (!adev->ip_blocks[i].status.valid)
4943                         continue;
4944                 if (adev->ip_blocks[i].status.hang &&
4945                     adev->ip_blocks[i].version->funcs->soft_reset) {
4946                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4947                         if (r)
4948                                 return r;
4949                 }
4950         }
4951
4952         return 0;
4953 }
4954
4955 /**
4956  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4957  *
4958  * @adev: amdgpu_device pointer
4959  *
4960  * The list of all the hardware IPs that make up the asic is walked and the
4961  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4962  * handles any IP specific hardware or software state changes that are
4963  * necessary after the IP has been soft reset.
4964  * Returns 0 on success, negative error code on failure.
4965  */
4966 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4967 {
4968         int i, r = 0;
4969
4970         for (i = 0; i < adev->num_ip_blocks; i++) {
4971                 if (!adev->ip_blocks[i].status.valid)
4972                         continue;
4973                 if (adev->ip_blocks[i].status.hang &&
4974                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4975                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4976                 if (r)
4977                         return r;
4978         }
4979
4980         return 0;
4981 }
4982
4983 /**
4984  * amdgpu_device_recover_vram - Recover some VRAM contents
4985  *
4986  * @adev: amdgpu_device pointer
4987  *
4988  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4989  * restore things like GPUVM page tables after a GPU reset where
4990  * the contents of VRAM might be lost.
4991  *
4992  * Returns:
4993  * 0 on success, negative error code on failure.
4994  */
4995 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4996 {
4997         struct dma_fence *fence = NULL, *next = NULL;
4998         struct amdgpu_bo *shadow;
4999         struct amdgpu_bo_vm *vmbo;
5000         long r = 1, tmo;
5001
5002         if (amdgpu_sriov_runtime(adev))
5003                 tmo = msecs_to_jiffies(8000);
5004         else
5005                 tmo = msecs_to_jiffies(100);
5006
5007         dev_info(adev->dev, "recover vram bo from shadow start\n");
5008         mutex_lock(&adev->shadow_list_lock);
5009         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
5010                 /* If vm is compute context or adev is APU, shadow will be NULL */
5011                 if (!vmbo->shadow)
5012                         continue;
5013                 shadow = vmbo->shadow;
5014
5015                 /* No need to recover an evicted BO */
5016                 if (!shadow->tbo.resource ||
5017                     shadow->tbo.resource->mem_type != TTM_PL_TT ||
5018                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
5019                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
5020                         continue;
5021
5022                 r = amdgpu_bo_restore_shadow(shadow, &next);
5023                 if (r)
5024                         break;
5025
5026                 if (fence) {
5027                         tmo = dma_fence_wait_timeout(fence, false, tmo);
5028                         dma_fence_put(fence);
5029                         fence = next;
5030                         if (tmo == 0) {
5031                                 r = -ETIMEDOUT;
5032                                 break;
5033                         } else if (tmo < 0) {
5034                                 r = tmo;
5035                                 break;
5036                         }
5037                 } else {
5038                         fence = next;
5039                 }
5040         }
5041         mutex_unlock(&adev->shadow_list_lock);
5042
5043         if (fence)
5044                 tmo = dma_fence_wait_timeout(fence, false, tmo);
5045         dma_fence_put(fence);
5046
5047         if (r < 0 || tmo <= 0) {
5048                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
5049                 return -EIO;
5050         }
5051
5052         dev_info(adev->dev, "recover vram bo from shadow done\n");
5053         return 0;
5054 }
5055
5056
5057 /**
5058  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5059  *
5060  * @adev: amdgpu_device pointer
5061  * @reset_context: amdgpu reset context pointer
5062  *
5063  * do VF FLR and reinitialize Asic
5064  * return 0 means succeeded otherwise failed
5065  */
5066 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5067                                      struct amdgpu_reset_context *reset_context)
5068 {
5069         int r;
5070         struct amdgpu_hive_info *hive = NULL;
5071
5072         if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5073                 clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5074                 r = amdgpu_virt_request_full_gpu(adev, true);
5075         } else {
5076                 r = amdgpu_virt_reset_gpu(adev);
5077         }
5078         if (r)
5079                 return r;
5080
5081         amdgpu_ras_set_fed(adev, false);
5082         amdgpu_irq_gpu_reset_resume_helper(adev);
5083
5084         /* some sw clean up VF needs to do before recover */
5085         amdgpu_virt_post_reset(adev);
5086
5087         /* Resume IP prior to SMC */
5088         r = amdgpu_device_ip_reinit_early_sriov(adev);
5089         if (r)
5090                 return r;
5091
5092         amdgpu_virt_init_data_exchange(adev);
5093
5094         r = amdgpu_device_fw_loading(adev);
5095         if (r)
5096                 return r;
5097
5098         /* now we are okay to resume SMC/CP/SDMA */
5099         r = amdgpu_device_ip_reinit_late_sriov(adev);
5100         if (r)
5101                 return r;
5102
5103         hive = amdgpu_get_xgmi_hive(adev);
5104         /* Update PSP FW topology after reset */
5105         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5106                 r = amdgpu_xgmi_update_topology(hive, adev);
5107         if (hive)
5108                 amdgpu_put_xgmi_hive(hive);
5109         if (r)
5110                 return r;
5111
5112         r = amdgpu_ib_ring_tests(adev);
5113         if (r)
5114                 return r;
5115
5116         if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5117                 amdgpu_inc_vram_lost(adev);
5118                 r = amdgpu_device_recover_vram(adev);
5119         }
5120         if (r)
5121                 return r;
5122
5123         /* need to be called during full access so we can't do it later like
5124          * bare-metal does.
5125          */
5126         amdgpu_amdkfd_post_reset(adev);
5127         amdgpu_virt_release_full_gpu(adev, true);
5128
5129         /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5130         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5131             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5132             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5133             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5134                 amdgpu_ras_resume(adev);
5135         return 0;
5136 }
5137
5138 /**
5139  * amdgpu_device_has_job_running - check if there is any job in mirror list
5140  *
5141  * @adev: amdgpu_device pointer
5142  *
5143  * check if there is any job in mirror list
5144  */
5145 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5146 {
5147         int i;
5148         struct drm_sched_job *job;
5149
5150         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5151                 struct amdgpu_ring *ring = adev->rings[i];
5152
5153                 if (!amdgpu_ring_sched_ready(ring))
5154                         continue;
5155
5156                 spin_lock(&ring->sched.job_list_lock);
5157                 job = list_first_entry_or_null(&ring->sched.pending_list,
5158                                                struct drm_sched_job, list);
5159                 spin_unlock(&ring->sched.job_list_lock);
5160                 if (job)
5161                         return true;
5162         }
5163         return false;
5164 }
5165
5166 /**
5167  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5168  *
5169  * @adev: amdgpu_device pointer
5170  *
5171  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5172  * a hung GPU.
5173  */
5174 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5175 {
5176
5177         if (amdgpu_gpu_recovery == 0)
5178                 goto disabled;
5179
5180         /* Skip soft reset check in fatal error mode */
5181         if (!amdgpu_ras_is_poison_mode_supported(adev))
5182                 return true;
5183
5184         if (amdgpu_sriov_vf(adev))
5185                 return true;
5186
5187         if (amdgpu_gpu_recovery == -1) {
5188                 switch (adev->asic_type) {
5189 #ifdef CONFIG_DRM_AMDGPU_SI
5190                 case CHIP_VERDE:
5191                 case CHIP_TAHITI:
5192                 case CHIP_PITCAIRN:
5193                 case CHIP_OLAND:
5194                 case CHIP_HAINAN:
5195 #endif
5196 #ifdef CONFIG_DRM_AMDGPU_CIK
5197                 case CHIP_KAVERI:
5198                 case CHIP_KABINI:
5199                 case CHIP_MULLINS:
5200 #endif
5201                 case CHIP_CARRIZO:
5202                 case CHIP_STONEY:
5203                 case CHIP_CYAN_SKILLFISH:
5204                         goto disabled;
5205                 default:
5206                         break;
5207                 }
5208         }
5209
5210         return true;
5211
5212 disabled:
5213                 dev_info(adev->dev, "GPU recovery disabled.\n");
5214                 return false;
5215 }
5216
5217 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5218 {
5219         u32 i;
5220         int ret = 0;
5221
5222         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5223
5224         dev_info(adev->dev, "GPU mode1 reset\n");
5225
5226         /* disable BM */
5227         pci_clear_master(adev->pdev);
5228
5229         amdgpu_device_cache_pci_state(adev->pdev);
5230
5231         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5232                 dev_info(adev->dev, "GPU smu mode1 reset\n");
5233                 ret = amdgpu_dpm_mode1_reset(adev);
5234         } else {
5235                 dev_info(adev->dev, "GPU psp mode1 reset\n");
5236                 ret = psp_gpu_reset(adev);
5237         }
5238
5239         if (ret)
5240                 goto mode1_reset_failed;
5241
5242         amdgpu_device_load_pci_state(adev->pdev);
5243         ret = amdgpu_psp_wait_for_bootloader(adev);
5244         if (ret)
5245                 goto mode1_reset_failed;
5246
5247         /* wait for asic to come out of reset */
5248         for (i = 0; i < adev->usec_timeout; i++) {
5249                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5250
5251                 if (memsize != 0xffffffff)
5252                         break;
5253                 udelay(1);
5254         }
5255
5256         if (i >= adev->usec_timeout) {
5257                 ret = -ETIMEDOUT;
5258                 goto mode1_reset_failed;
5259         }
5260
5261         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5262
5263         return 0;
5264
5265 mode1_reset_failed:
5266         dev_err(adev->dev, "GPU mode1 reset failed\n");
5267         return ret;
5268 }
5269
5270 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5271                                  struct amdgpu_reset_context *reset_context)
5272 {
5273         int i, r = 0;
5274         struct amdgpu_job *job = NULL;
5275         bool need_full_reset =
5276                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5277
5278         if (reset_context->reset_req_dev == adev)
5279                 job = reset_context->job;
5280
5281         if (amdgpu_sriov_vf(adev)) {
5282                 /* stop the data exchange thread */
5283                 amdgpu_virt_fini_data_exchange(adev);
5284         }
5285
5286         amdgpu_fence_driver_isr_toggle(adev, true);
5287
5288         /* block all schedulers and reset given job's ring */
5289         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5290                 struct amdgpu_ring *ring = adev->rings[i];
5291
5292                 if (!amdgpu_ring_sched_ready(ring))
5293                         continue;
5294
5295                 /* Clear job fence from fence drv to avoid force_completion
5296                  * leave NULL and vm flush fence in fence drv
5297                  */
5298                 amdgpu_fence_driver_clear_job_fences(ring);
5299
5300                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5301                 amdgpu_fence_driver_force_completion(ring);
5302         }
5303
5304         amdgpu_fence_driver_isr_toggle(adev, false);
5305
5306         if (job && job->vm)
5307                 drm_sched_increase_karma(&job->base);
5308
5309         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5310         /* If reset handler not implemented, continue; otherwise return */
5311         if (r == -EOPNOTSUPP)
5312                 r = 0;
5313         else
5314                 return r;
5315
5316         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5317         if (!amdgpu_sriov_vf(adev)) {
5318
5319                 if (!need_full_reset)
5320                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5321
5322                 if (!need_full_reset && amdgpu_gpu_recovery &&
5323                     amdgpu_device_ip_check_soft_reset(adev)) {
5324                         amdgpu_device_ip_pre_soft_reset(adev);
5325                         r = amdgpu_device_ip_soft_reset(adev);
5326                         amdgpu_device_ip_post_soft_reset(adev);
5327                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5328                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5329                                 need_full_reset = true;
5330                         }
5331                 }
5332
5333                 if (need_full_reset)
5334                         r = amdgpu_device_ip_suspend(adev);
5335                 if (need_full_reset)
5336                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5337                 else
5338                         clear_bit(AMDGPU_NEED_FULL_RESET,
5339                                   &reset_context->flags);
5340         }
5341
5342         return r;
5343 }
5344
5345 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5346 {
5347         int i;
5348
5349         lockdep_assert_held(&adev->reset_domain->sem);
5350
5351         for (i = 0; i < adev->reset_info.num_regs; i++) {
5352                 adev->reset_info.reset_dump_reg_value[i] =
5353                         RREG32(adev->reset_info.reset_dump_reg_list[i]);
5354
5355                 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5356                                              adev->reset_info.reset_dump_reg_value[i]);
5357         }
5358
5359         return 0;
5360 }
5361
5362 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5363                          struct amdgpu_reset_context *reset_context)
5364 {
5365         struct amdgpu_device *tmp_adev = NULL;
5366         bool need_full_reset, skip_hw_reset, vram_lost = false;
5367         int r = 0;
5368         uint32_t i;
5369
5370         /* Try reset handler method first */
5371         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5372                                     reset_list);
5373
5374         if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5375                 amdgpu_reset_reg_dumps(tmp_adev);
5376
5377                 dev_info(tmp_adev->dev, "Dumping IP State\n");
5378                 /* Trigger ip dump before we reset the asic */
5379                 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5380                         if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5381                                 tmp_adev->ip_blocks[i].version->funcs
5382                                 ->dump_ip_state((void *)tmp_adev);
5383                 dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5384         }
5385
5386         reset_context->reset_device_list = device_list_handle;
5387         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5388         /* If reset handler not implemented, continue; otherwise return */
5389         if (r == -EOPNOTSUPP)
5390                 r = 0;
5391         else
5392                 return r;
5393
5394         /* Reset handler not implemented, use the default method */
5395         need_full_reset =
5396                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5397         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5398
5399         /*
5400          * ASIC reset has to be done on all XGMI hive nodes ASAP
5401          * to allow proper links negotiation in FW (within 1 sec)
5402          */
5403         if (!skip_hw_reset && need_full_reset) {
5404                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5405                         /* For XGMI run all resets in parallel to speed up the process */
5406                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5407                                 tmp_adev->gmc.xgmi.pending_reset = false;
5408                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5409                                         r = -EALREADY;
5410                         } else
5411                                 r = amdgpu_asic_reset(tmp_adev);
5412
5413                         if (r) {
5414                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5415                                          r, adev_to_drm(tmp_adev)->unique);
5416                                 goto out;
5417                         }
5418                 }
5419
5420                 /* For XGMI wait for all resets to complete before proceed */
5421                 if (!r) {
5422                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5423                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5424                                         flush_work(&tmp_adev->xgmi_reset_work);
5425                                         r = tmp_adev->asic_reset_res;
5426                                         if (r)
5427                                                 break;
5428                                 }
5429                         }
5430                 }
5431         }
5432
5433         if (!r && amdgpu_ras_intr_triggered()) {
5434                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5435                         amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5436                 }
5437
5438                 amdgpu_ras_intr_cleared();
5439         }
5440
5441         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5442                 if (need_full_reset) {
5443                         /* post card */
5444                         amdgpu_ras_set_fed(tmp_adev, false);
5445                         r = amdgpu_device_asic_init(tmp_adev);
5446                         if (r) {
5447                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
5448                         } else {
5449                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5450
5451                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5452                                 if (r)
5453                                         goto out;
5454
5455                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5456
5457                                 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5458                                         amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5459
5460                                 if (vram_lost) {
5461                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
5462                                         amdgpu_inc_vram_lost(tmp_adev);
5463                                 }
5464
5465                                 r = amdgpu_device_fw_loading(tmp_adev);
5466                                 if (r)
5467                                         return r;
5468
5469                                 r = amdgpu_xcp_restore_partition_mode(
5470                                         tmp_adev->xcp_mgr);
5471                                 if (r)
5472                                         goto out;
5473
5474                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5475                                 if (r)
5476                                         goto out;
5477
5478                                 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5479                                         amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5480
5481                                 if (vram_lost)
5482                                         amdgpu_device_fill_reset_magic(tmp_adev);
5483
5484                                 /*
5485                                  * Add this ASIC as tracked as reset was already
5486                                  * complete successfully.
5487                                  */
5488                                 amdgpu_register_gpu_instance(tmp_adev);
5489
5490                                 if (!reset_context->hive &&
5491                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5492                                         amdgpu_xgmi_add_device(tmp_adev);
5493
5494                                 r = amdgpu_device_ip_late_init(tmp_adev);
5495                                 if (r)
5496                                         goto out;
5497
5498                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5499
5500                                 /*
5501                                  * The GPU enters bad state once faulty pages
5502                                  * by ECC has reached the threshold, and ras
5503                                  * recovery is scheduled next. So add one check
5504                                  * here to break recovery if it indeed exceeds
5505                                  * bad page threshold, and remind user to
5506                                  * retire this GPU or setting one bigger
5507                                  * bad_page_threshold value to fix this once
5508                                  * probing driver again.
5509                                  */
5510                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5511                                         /* must succeed. */
5512                                         amdgpu_ras_resume(tmp_adev);
5513                                 } else {
5514                                         r = -EINVAL;
5515                                         goto out;
5516                                 }
5517
5518                                 /* Update PSP FW topology after reset */
5519                                 if (reset_context->hive &&
5520                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5521                                         r = amdgpu_xgmi_update_topology(
5522                                                 reset_context->hive, tmp_adev);
5523                         }
5524                 }
5525
5526 out:
5527                 if (!r) {
5528                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5529                         r = amdgpu_ib_ring_tests(tmp_adev);
5530                         if (r) {
5531                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5532                                 need_full_reset = true;
5533                                 r = -EAGAIN;
5534                                 goto end;
5535                         }
5536                 }
5537
5538                 if (!r)
5539                         r = amdgpu_device_recover_vram(tmp_adev);
5540                 else
5541                         tmp_adev->asic_reset_res = r;
5542         }
5543
5544 end:
5545         if (need_full_reset)
5546                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5547         else
5548                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5549         return r;
5550 }
5551
5552 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5553 {
5554
5555         switch (amdgpu_asic_reset_method(adev)) {
5556         case AMD_RESET_METHOD_MODE1:
5557                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5558                 break;
5559         case AMD_RESET_METHOD_MODE2:
5560                 adev->mp1_state = PP_MP1_STATE_RESET;
5561                 break;
5562         default:
5563                 adev->mp1_state = PP_MP1_STATE_NONE;
5564                 break;
5565         }
5566 }
5567
5568 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5569 {
5570         amdgpu_vf_error_trans_all(adev);
5571         adev->mp1_state = PP_MP1_STATE_NONE;
5572 }
5573
5574 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5575 {
5576         struct pci_dev *p = NULL;
5577
5578         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5579                         adev->pdev->bus->number, 1);
5580         if (p) {
5581                 pm_runtime_enable(&(p->dev));
5582                 pm_runtime_resume(&(p->dev));
5583         }
5584
5585         pci_dev_put(p);
5586 }
5587
5588 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5589 {
5590         enum amd_reset_method reset_method;
5591         struct pci_dev *p = NULL;
5592         u64 expires;
5593
5594         /*
5595          * For now, only BACO and mode1 reset are confirmed
5596          * to suffer the audio issue without proper suspended.
5597          */
5598         reset_method = amdgpu_asic_reset_method(adev);
5599         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5600              (reset_method != AMD_RESET_METHOD_MODE1))
5601                 return -EINVAL;
5602
5603         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5604                         adev->pdev->bus->number, 1);
5605         if (!p)
5606                 return -ENODEV;
5607
5608         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5609         if (!expires)
5610                 /*
5611                  * If we cannot get the audio device autosuspend delay,
5612                  * a fixed 4S interval will be used. Considering 3S is
5613                  * the audio controller default autosuspend delay setting.
5614                  * 4S used here is guaranteed to cover that.
5615                  */
5616                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5617
5618         while (!pm_runtime_status_suspended(&(p->dev))) {
5619                 if (!pm_runtime_suspend(&(p->dev)))
5620                         break;
5621
5622                 if (expires < ktime_get_mono_fast_ns()) {
5623                         dev_warn(adev->dev, "failed to suspend display audio\n");
5624                         pci_dev_put(p);
5625                         /* TODO: abort the succeeding gpu reset? */
5626                         return -ETIMEDOUT;
5627                 }
5628         }
5629
5630         pm_runtime_disable(&(p->dev));
5631
5632         pci_dev_put(p);
5633         return 0;
5634 }
5635
5636 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5637 {
5638         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5639
5640 #if defined(CONFIG_DEBUG_FS)
5641         if (!amdgpu_sriov_vf(adev))
5642                 cancel_work(&adev->reset_work);
5643 #endif
5644
5645         if (adev->kfd.dev)
5646                 cancel_work(&adev->kfd.reset_work);
5647
5648         if (amdgpu_sriov_vf(adev))
5649                 cancel_work(&adev->virt.flr_work);
5650
5651         if (con && adev->ras_enabled)
5652                 cancel_work(&con->recovery_work);
5653
5654 }
5655
5656 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5657 {
5658         struct amdgpu_device *tmp_adev;
5659         int ret = 0;
5660         u32 status;
5661
5662         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5663                 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5664                 if (PCI_POSSIBLE_ERROR(status)) {
5665                         dev_err(tmp_adev->dev, "device lost from bus!");
5666                         ret = -ENODEV;
5667                 }
5668         }
5669
5670         return ret;
5671 }
5672
5673 /**
5674  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5675  *
5676  * @adev: amdgpu_device pointer
5677  * @job: which job trigger hang
5678  * @reset_context: amdgpu reset context pointer
5679  *
5680  * Attempt to reset the GPU if it has hung (all asics).
5681  * Attempt to do soft-reset or full-reset and reinitialize Asic
5682  * Returns 0 for success or an error on failure.
5683  */
5684
5685 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5686                               struct amdgpu_job *job,
5687                               struct amdgpu_reset_context *reset_context)
5688 {
5689         struct list_head device_list, *device_list_handle =  NULL;
5690         bool job_signaled = false;
5691         struct amdgpu_hive_info *hive = NULL;
5692         struct amdgpu_device *tmp_adev = NULL;
5693         int i, r = 0;
5694         bool need_emergency_restart = false;
5695         bool audio_suspended = false;
5696         int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5697
5698         /*
5699          * Special case: RAS triggered and full reset isn't supported
5700          */
5701         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5702
5703         /*
5704          * Flush RAM to disk so that after reboot
5705          * the user can read log and see why the system rebooted.
5706          */
5707         if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5708                 amdgpu_ras_get_context(adev)->reboot) {
5709                 DRM_WARN("Emergency reboot.");
5710
5711                 ksys_sync_helper();
5712                 emergency_restart();
5713         }
5714
5715         dev_info(adev->dev, "GPU %s begin!\n",
5716                 need_emergency_restart ? "jobs stop":"reset");
5717
5718         if (!amdgpu_sriov_vf(adev))
5719                 hive = amdgpu_get_xgmi_hive(adev);
5720         if (hive)
5721                 mutex_lock(&hive->hive_lock);
5722
5723         reset_context->job = job;
5724         reset_context->hive = hive;
5725         /*
5726          * Build list of devices to reset.
5727          * In case we are in XGMI hive mode, resort the device list
5728          * to put adev in the 1st position.
5729          */
5730         INIT_LIST_HEAD(&device_list);
5731         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5732                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5733                         list_add_tail(&tmp_adev->reset_list, &device_list);
5734                         if (adev->shutdown)
5735                                 tmp_adev->shutdown = true;
5736                 }
5737                 if (!list_is_first(&adev->reset_list, &device_list))
5738                         list_rotate_to_front(&adev->reset_list, &device_list);
5739                 device_list_handle = &device_list;
5740         } else {
5741                 list_add_tail(&adev->reset_list, &device_list);
5742                 device_list_handle = &device_list;
5743         }
5744
5745         if (!amdgpu_sriov_vf(adev)) {
5746                 r = amdgpu_device_health_check(device_list_handle);
5747                 if (r)
5748                         goto end_reset;
5749         }
5750
5751         /* We need to lock reset domain only once both for XGMI and single device */
5752         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5753                                     reset_list);
5754         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5755
5756         /* block all schedulers and reset given job's ring */
5757         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5758
5759                 amdgpu_device_set_mp1_state(tmp_adev);
5760
5761                 /*
5762                  * Try to put the audio codec into suspend state
5763                  * before gpu reset started.
5764                  *
5765                  * Due to the power domain of the graphics device
5766                  * is shared with AZ power domain. Without this,
5767                  * we may change the audio hardware from behind
5768                  * the audio driver's back. That will trigger
5769                  * some audio codec errors.
5770                  */
5771                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5772                         audio_suspended = true;
5773
5774                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5775
5776                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5777
5778                 amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5779
5780                 /*
5781                  * Mark these ASICs to be reseted as untracked first
5782                  * And add them back after reset completed
5783                  */
5784                 amdgpu_unregister_gpu_instance(tmp_adev);
5785
5786                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5787
5788                 /* disable ras on ALL IPs */
5789                 if (!need_emergency_restart &&
5790                       amdgpu_device_ip_need_full_reset(tmp_adev))
5791                         amdgpu_ras_suspend(tmp_adev);
5792
5793                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5794                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5795
5796                         if (!amdgpu_ring_sched_ready(ring))
5797                                 continue;
5798
5799                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5800
5801                         if (need_emergency_restart)
5802                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5803                 }
5804                 atomic_inc(&tmp_adev->gpu_reset_counter);
5805         }
5806
5807         if (need_emergency_restart)
5808                 goto skip_sched_resume;
5809
5810         /*
5811          * Must check guilty signal here since after this point all old
5812          * HW fences are force signaled.
5813          *
5814          * job->base holds a reference to parent fence
5815          */
5816         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5817                 job_signaled = true;
5818                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5819                 goto skip_hw_reset;
5820         }
5821
5822 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5823         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5824                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5825                 /*TODO Should we stop ?*/
5826                 if (r) {
5827                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5828                                   r, adev_to_drm(tmp_adev)->unique);
5829                         tmp_adev->asic_reset_res = r;
5830                 }
5831         }
5832
5833         /* Actual ASIC resets if needed.*/
5834         /* Host driver will handle XGMI hive reset for SRIOV */
5835         if (amdgpu_sriov_vf(adev)) {
5836                 r = amdgpu_device_reset_sriov(adev, reset_context);
5837                 if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5838                         amdgpu_virt_release_full_gpu(adev, true);
5839                         goto retry;
5840                 }
5841                 if (r)
5842                         adev->asic_reset_res = r;
5843         } else {
5844                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5845                 if (r && r == -EAGAIN)
5846                         goto retry;
5847         }
5848
5849         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5850                 /*
5851                  * Drop any pending non scheduler resets queued before reset is done.
5852                  * Any reset scheduled after this point would be valid. Scheduler resets
5853                  * were already dropped during drm_sched_stop and no new ones can come
5854                  * in before drm_sched_start.
5855                  */
5856                 amdgpu_device_stop_pending_resets(tmp_adev);
5857         }
5858
5859 skip_hw_reset:
5860
5861         /* Post ASIC reset for all devs .*/
5862         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5863
5864                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5865                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5866
5867                         if (!amdgpu_ring_sched_ready(ring))
5868                                 continue;
5869
5870                         drm_sched_start(&ring->sched, true);
5871                 }
5872
5873                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5874                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5875
5876                 if (tmp_adev->asic_reset_res)
5877                         r = tmp_adev->asic_reset_res;
5878
5879                 tmp_adev->asic_reset_res = 0;
5880
5881                 if (r) {
5882                         /* bad news, how to tell it to userspace ? */
5883                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5884                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5885                 } else {
5886                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5887                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5888                                 DRM_WARN("smart shift update failed\n");
5889                 }
5890         }
5891
5892 skip_sched_resume:
5893         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5894                 /* unlock kfd: SRIOV would do it separately */
5895                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5896                         amdgpu_amdkfd_post_reset(tmp_adev);
5897
5898                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5899                  * need to bring up kfd here if it's not be initialized before
5900                  */
5901                 if (!adev->kfd.init_complete)
5902                         amdgpu_amdkfd_device_init(adev);
5903
5904                 if (audio_suspended)
5905                         amdgpu_device_resume_display_audio(tmp_adev);
5906
5907                 amdgpu_device_unset_mp1_state(tmp_adev);
5908
5909                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5910         }
5911
5912         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5913                                             reset_list);
5914         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5915
5916 end_reset:
5917         if (hive) {
5918                 mutex_unlock(&hive->hive_lock);
5919                 amdgpu_put_xgmi_hive(hive);
5920         }
5921
5922         if (r)
5923                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5924
5925         atomic_set(&adev->reset_domain->reset_res, r);
5926         return r;
5927 }
5928
5929 /**
5930  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5931  *
5932  * @adev: amdgpu_device pointer
5933  * @speed: pointer to the speed of the link
5934  * @width: pointer to the width of the link
5935  *
5936  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5937  * first physical partner to an AMD dGPU.
5938  * This will exclude any virtual switches and links.
5939  */
5940 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5941                                             enum pci_bus_speed *speed,
5942                                             enum pcie_link_width *width)
5943 {
5944         struct pci_dev *parent = adev->pdev;
5945
5946         if (!speed || !width)
5947                 return;
5948
5949         *speed = PCI_SPEED_UNKNOWN;
5950         *width = PCIE_LNK_WIDTH_UNKNOWN;
5951
5952         if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5953                 while ((parent = pci_upstream_bridge(parent))) {
5954                         /* skip upstream/downstream switches internal to dGPU*/
5955                         if (parent->vendor == PCI_VENDOR_ID_ATI)
5956                                 continue;
5957                         *speed = pcie_get_speed_cap(parent);
5958                         *width = pcie_get_width_cap(parent);
5959                         break;
5960                 }
5961         } else {
5962                 /* use the current speeds rather than max if switching is not supported */
5963                 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5964         }
5965 }
5966
5967 /**
5968  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5969  *
5970  * @adev: amdgpu_device pointer
5971  *
5972  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5973  * and lanes) of the slot the device is in. Handles APUs and
5974  * virtualized environments where PCIE config space may not be available.
5975  */
5976 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5977 {
5978         struct pci_dev *pdev;
5979         enum pci_bus_speed speed_cap, platform_speed_cap;
5980         enum pcie_link_width platform_link_width;
5981
5982         if (amdgpu_pcie_gen_cap)
5983                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5984
5985         if (amdgpu_pcie_lane_cap)
5986                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5987
5988         /* covers APUs as well */
5989         if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
5990                 if (adev->pm.pcie_gen_mask == 0)
5991                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5992                 if (adev->pm.pcie_mlw_mask == 0)
5993                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5994                 return;
5995         }
5996
5997         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5998                 return;
5999
6000         amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6001                                         &platform_link_width);
6002
6003         if (adev->pm.pcie_gen_mask == 0) {
6004                 /* asic caps */
6005                 pdev = adev->pdev;
6006                 speed_cap = pcie_get_speed_cap(pdev);
6007                 if (speed_cap == PCI_SPEED_UNKNOWN) {
6008                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6009                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6010                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6011                 } else {
6012                         if (speed_cap == PCIE_SPEED_32_0GT)
6013                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6014                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6015                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6016                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6017                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6018                         else if (speed_cap == PCIE_SPEED_16_0GT)
6019                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6020                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6021                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6022                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6023                         else if (speed_cap == PCIE_SPEED_8_0GT)
6024                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6025                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6026                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6027                         else if (speed_cap == PCIE_SPEED_5_0GT)
6028                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6029                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6030                         else
6031                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6032                 }
6033                 /* platform caps */
6034                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6035                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6036                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6037                 } else {
6038                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
6039                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6040                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6041                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6042                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6043                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6044                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6045                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6046                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6047                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6048                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6049                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6050                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6051                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6052                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6053                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6054                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6055                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6056                         else
6057                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6058
6059                 }
6060         }
6061         if (adev->pm.pcie_mlw_mask == 0) {
6062                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6063                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6064                 } else {
6065                         switch (platform_link_width) {
6066                         case PCIE_LNK_X32:
6067                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6068                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6069                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6070                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6071                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6072                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6073                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6074                                 break;
6075                         case PCIE_LNK_X16:
6076                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6077                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6078                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6079                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6080                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6081                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6082                                 break;
6083                         case PCIE_LNK_X12:
6084                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6085                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6086                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6087                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6088                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6089                                 break;
6090                         case PCIE_LNK_X8:
6091                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6092                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6093                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6094                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6095                                 break;
6096                         case PCIE_LNK_X4:
6097                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6098                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6099                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6100                                 break;
6101                         case PCIE_LNK_X2:
6102                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6103                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6104                                 break;
6105                         case PCIE_LNK_X1:
6106                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6107                                 break;
6108                         default:
6109                                 break;
6110                         }
6111                 }
6112         }
6113 }
6114
6115 /**
6116  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6117  *
6118  * @adev: amdgpu_device pointer
6119  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6120  *
6121  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6122  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6123  * @peer_adev.
6124  */
6125 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6126                                       struct amdgpu_device *peer_adev)
6127 {
6128 #ifdef CONFIG_HSA_AMD_P2P
6129         uint64_t address_mask = peer_adev->dev->dma_mask ?
6130                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6131         resource_size_t aper_limit =
6132                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6133         bool p2p_access =
6134                 !adev->gmc.xgmi.connected_to_cpu &&
6135                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6136
6137         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6138                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6139                 !(adev->gmc.aper_base & address_mask ||
6140                   aper_limit & address_mask));
6141 #else
6142         return false;
6143 #endif
6144 }
6145
6146 int amdgpu_device_baco_enter(struct drm_device *dev)
6147 {
6148         struct amdgpu_device *adev = drm_to_adev(dev);
6149         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6150
6151         if (!amdgpu_device_supports_baco(dev))
6152                 return -ENOTSUPP;
6153
6154         if (ras && adev->ras_enabled &&
6155             adev->nbio.funcs->enable_doorbell_interrupt)
6156                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6157
6158         return amdgpu_dpm_baco_enter(adev);
6159 }
6160
6161 int amdgpu_device_baco_exit(struct drm_device *dev)
6162 {
6163         struct amdgpu_device *adev = drm_to_adev(dev);
6164         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6165         int ret = 0;
6166
6167         if (!amdgpu_device_supports_baco(dev))
6168                 return -ENOTSUPP;
6169
6170         ret = amdgpu_dpm_baco_exit(adev);
6171         if (ret)
6172                 return ret;
6173
6174         if (ras && adev->ras_enabled &&
6175             adev->nbio.funcs->enable_doorbell_interrupt)
6176                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6177
6178         if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6179             adev->nbio.funcs->clear_doorbell_interrupt)
6180                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6181
6182         return 0;
6183 }
6184
6185 /**
6186  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6187  * @pdev: PCI device struct
6188  * @state: PCI channel state
6189  *
6190  * Description: Called when a PCI error is detected.
6191  *
6192  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6193  */
6194 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6195 {
6196         struct drm_device *dev = pci_get_drvdata(pdev);
6197         struct amdgpu_device *adev = drm_to_adev(dev);
6198         int i;
6199
6200         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6201
6202         if (adev->gmc.xgmi.num_physical_nodes > 1) {
6203                 DRM_WARN("No support for XGMI hive yet...");
6204                 return PCI_ERS_RESULT_DISCONNECT;
6205         }
6206
6207         adev->pci_channel_state = state;
6208
6209         switch (state) {
6210         case pci_channel_io_normal:
6211                 return PCI_ERS_RESULT_CAN_RECOVER;
6212         /* Fatal error, prepare for slot reset */
6213         case pci_channel_io_frozen:
6214                 /*
6215                  * Locking adev->reset_domain->sem will prevent any external access
6216                  * to GPU during PCI error recovery
6217                  */
6218                 amdgpu_device_lock_reset_domain(adev->reset_domain);
6219                 amdgpu_device_set_mp1_state(adev);
6220
6221                 /*
6222                  * Block any work scheduling as we do for regular GPU reset
6223                  * for the duration of the recovery
6224                  */
6225                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6226                         struct amdgpu_ring *ring = adev->rings[i];
6227
6228                         if (!amdgpu_ring_sched_ready(ring))
6229                                 continue;
6230
6231                         drm_sched_stop(&ring->sched, NULL);
6232                 }
6233                 atomic_inc(&adev->gpu_reset_counter);
6234                 return PCI_ERS_RESULT_NEED_RESET;
6235         case pci_channel_io_perm_failure:
6236                 /* Permanent error, prepare for device removal */
6237                 return PCI_ERS_RESULT_DISCONNECT;
6238         }
6239
6240         return PCI_ERS_RESULT_NEED_RESET;
6241 }
6242
6243 /**
6244  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6245  * @pdev: pointer to PCI device
6246  */
6247 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6248 {
6249
6250         DRM_INFO("PCI error: mmio enabled callback!!\n");
6251
6252         /* TODO - dump whatever for debugging purposes */
6253
6254         /* This called only if amdgpu_pci_error_detected returns
6255          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6256          * works, no need to reset slot.
6257          */
6258
6259         return PCI_ERS_RESULT_RECOVERED;
6260 }
6261
6262 /**
6263  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6264  * @pdev: PCI device struct
6265  *
6266  * Description: This routine is called by the pci error recovery
6267  * code after the PCI slot has been reset, just before we
6268  * should resume normal operations.
6269  */
6270 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6271 {
6272         struct drm_device *dev = pci_get_drvdata(pdev);
6273         struct amdgpu_device *adev = drm_to_adev(dev);
6274         int r, i;
6275         struct amdgpu_reset_context reset_context;
6276         u32 memsize;
6277         struct list_head device_list;
6278         struct amdgpu_hive_info *hive;
6279         int hive_ras_recovery = 0;
6280         struct amdgpu_ras *ras;
6281
6282         /* PCI error slot reset should be skipped During RAS recovery */
6283         hive = amdgpu_get_xgmi_hive(adev);
6284         if (hive) {
6285                 hive_ras_recovery = atomic_read(&hive->ras_recovery);
6286                 amdgpu_put_xgmi_hive(hive);
6287         }
6288         ras = amdgpu_ras_get_context(adev);
6289         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6290              amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6291             ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
6292                 return PCI_ERS_RESULT_RECOVERED;
6293
6294         DRM_INFO("PCI error: slot reset callback!!\n");
6295
6296         memset(&reset_context, 0, sizeof(reset_context));
6297
6298         INIT_LIST_HEAD(&device_list);
6299         list_add_tail(&adev->reset_list, &device_list);
6300
6301         /* wait for asic to come out of reset */
6302         msleep(500);
6303
6304         /* Restore PCI confspace */
6305         amdgpu_device_load_pci_state(pdev);
6306
6307         /* confirm  ASIC came out of reset */
6308         for (i = 0; i < adev->usec_timeout; i++) {
6309                 memsize = amdgpu_asic_get_config_memsize(adev);
6310
6311                 if (memsize != 0xffffffff)
6312                         break;
6313                 udelay(1);
6314         }
6315         if (memsize == 0xffffffff) {
6316                 r = -ETIME;
6317                 goto out;
6318         }
6319
6320         reset_context.method = AMD_RESET_METHOD_NONE;
6321         reset_context.reset_req_dev = adev;
6322         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6323         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6324
6325         adev->no_hw_access = true;
6326         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6327         adev->no_hw_access = false;
6328         if (r)
6329                 goto out;
6330
6331         r = amdgpu_do_asic_reset(&device_list, &reset_context);
6332
6333 out:
6334         if (!r) {
6335                 if (amdgpu_device_cache_pci_state(adev->pdev))
6336                         pci_restore_state(adev->pdev);
6337
6338                 DRM_INFO("PCIe error recovery succeeded\n");
6339         } else {
6340                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
6341                 amdgpu_device_unset_mp1_state(adev);
6342                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6343         }
6344
6345         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6346 }
6347
6348 /**
6349  * amdgpu_pci_resume() - resume normal ops after PCI reset
6350  * @pdev: pointer to PCI device
6351  *
6352  * Called when the error recovery driver tells us that its
6353  * OK to resume normal operation.
6354  */
6355 void amdgpu_pci_resume(struct pci_dev *pdev)
6356 {
6357         struct drm_device *dev = pci_get_drvdata(pdev);
6358         struct amdgpu_device *adev = drm_to_adev(dev);
6359         int i;
6360
6361
6362         DRM_INFO("PCI error: resume callback!!\n");
6363
6364         /* Only continue execution for the case of pci_channel_io_frozen */
6365         if (adev->pci_channel_state != pci_channel_io_frozen)
6366                 return;
6367
6368         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6369                 struct amdgpu_ring *ring = adev->rings[i];
6370
6371                 if (!amdgpu_ring_sched_ready(ring))
6372                         continue;
6373
6374                 drm_sched_start(&ring->sched, true);
6375         }
6376
6377         amdgpu_device_unset_mp1_state(adev);
6378         amdgpu_device_unlock_reset_domain(adev->reset_domain);
6379 }
6380
6381 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6382 {
6383         struct drm_device *dev = pci_get_drvdata(pdev);
6384         struct amdgpu_device *adev = drm_to_adev(dev);
6385         int r;
6386
6387         r = pci_save_state(pdev);
6388         if (!r) {
6389                 kfree(adev->pci_state);
6390
6391                 adev->pci_state = pci_store_saved_state(pdev);
6392
6393                 if (!adev->pci_state) {
6394                         DRM_ERROR("Failed to store PCI saved state");
6395                         return false;
6396                 }
6397         } else {
6398                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6399                 return false;
6400         }
6401
6402         return true;
6403 }
6404
6405 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6406 {
6407         struct drm_device *dev = pci_get_drvdata(pdev);
6408         struct amdgpu_device *adev = drm_to_adev(dev);
6409         int r;
6410
6411         if (!adev->pci_state)
6412                 return false;
6413
6414         r = pci_load_saved_state(pdev, adev->pci_state);
6415
6416         if (!r) {
6417                 pci_restore_state(pdev);
6418         } else {
6419                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6420                 return false;
6421         }
6422
6423         return true;
6424 }
6425
6426 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6427                 struct amdgpu_ring *ring)
6428 {
6429 #ifdef CONFIG_X86_64
6430         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6431                 return;
6432 #endif
6433         if (adev->gmc.xgmi.connected_to_cpu)
6434                 return;
6435
6436         if (ring && ring->funcs->emit_hdp_flush)
6437                 amdgpu_ring_emit_hdp_flush(ring);
6438         else
6439                 amdgpu_asic_flush_hdp(adev, ring);
6440 }
6441
6442 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6443                 struct amdgpu_ring *ring)
6444 {
6445 #ifdef CONFIG_X86_64
6446         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6447                 return;
6448 #endif
6449         if (adev->gmc.xgmi.connected_to_cpu)
6450                 return;
6451
6452         amdgpu_asic_invalidate_hdp(adev, ring);
6453 }
6454
6455 int amdgpu_in_reset(struct amdgpu_device *adev)
6456 {
6457         return atomic_read(&adev->reset_domain->in_gpu_reset);
6458 }
6459
6460 /**
6461  * amdgpu_device_halt() - bring hardware to some kind of halt state
6462  *
6463  * @adev: amdgpu_device pointer
6464  *
6465  * Bring hardware to some kind of halt state so that no one can touch it
6466  * any more. It will help to maintain error context when error occurred.
6467  * Compare to a simple hang, the system will keep stable at least for SSH
6468  * access. Then it should be trivial to inspect the hardware state and
6469  * see what's going on. Implemented as following:
6470  *
6471  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6472  *    clears all CPU mappings to device, disallows remappings through page faults
6473  * 2. amdgpu_irq_disable_all() disables all interrupts
6474  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6475  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6476  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6477  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6478  *    flush any in flight DMA operations
6479  */
6480 void amdgpu_device_halt(struct amdgpu_device *adev)
6481 {
6482         struct pci_dev *pdev = adev->pdev;
6483         struct drm_device *ddev = adev_to_drm(adev);
6484
6485         amdgpu_xcp_dev_unplug(adev);
6486         drm_dev_unplug(ddev);
6487
6488         amdgpu_irq_disable_all(adev);
6489
6490         amdgpu_fence_driver_hw_fini(adev);
6491
6492         adev->no_hw_access = true;
6493
6494         amdgpu_device_unmap_mmio(adev);
6495
6496         pci_disable_device(pdev);
6497         pci_wait_for_pending_transaction(pdev);
6498 }
6499
6500 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6501                                 u32 reg)
6502 {
6503         unsigned long flags, address, data;
6504         u32 r;
6505
6506         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6507         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6508
6509         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6510         WREG32(address, reg * 4);
6511         (void)RREG32(address);
6512         r = RREG32(data);
6513         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6514         return r;
6515 }
6516
6517 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6518                                 u32 reg, u32 v)
6519 {
6520         unsigned long flags, address, data;
6521
6522         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6523         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6524
6525         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6526         WREG32(address, reg * 4);
6527         (void)RREG32(address);
6528         WREG32(data, v);
6529         (void)RREG32(data);
6530         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6531 }
6532
6533 /**
6534  * amdgpu_device_switch_gang - switch to a new gang
6535  * @adev: amdgpu_device pointer
6536  * @gang: the gang to switch to
6537  *
6538  * Try to switch to a new gang.
6539  * Returns: NULL if we switched to the new gang or a reference to the current
6540  * gang leader.
6541  */
6542 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6543                                             struct dma_fence *gang)
6544 {
6545         struct dma_fence *old = NULL;
6546
6547         do {
6548                 dma_fence_put(old);
6549                 rcu_read_lock();
6550                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6551                 rcu_read_unlock();
6552
6553                 if (old == gang)
6554                         break;
6555
6556                 if (!dma_fence_is_signaled(old))
6557                         return old;
6558
6559         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6560                          old, gang) != old);
6561
6562         dma_fence_put(old);
6563         return NULL;
6564 }
6565
6566 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6567 {
6568         switch (adev->asic_type) {
6569 #ifdef CONFIG_DRM_AMDGPU_SI
6570         case CHIP_HAINAN:
6571 #endif
6572         case CHIP_TOPAZ:
6573                 /* chips with no display hardware */
6574                 return false;
6575 #ifdef CONFIG_DRM_AMDGPU_SI
6576         case CHIP_TAHITI:
6577         case CHIP_PITCAIRN:
6578         case CHIP_VERDE:
6579         case CHIP_OLAND:
6580 #endif
6581 #ifdef CONFIG_DRM_AMDGPU_CIK
6582         case CHIP_BONAIRE:
6583         case CHIP_HAWAII:
6584         case CHIP_KAVERI:
6585         case CHIP_KABINI:
6586         case CHIP_MULLINS:
6587 #endif
6588         case CHIP_TONGA:
6589         case CHIP_FIJI:
6590         case CHIP_POLARIS10:
6591         case CHIP_POLARIS11:
6592         case CHIP_POLARIS12:
6593         case CHIP_VEGAM:
6594         case CHIP_CARRIZO:
6595         case CHIP_STONEY:
6596                 /* chips with display hardware */
6597                 return true;
6598         default:
6599                 /* IP discovery */
6600                 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6601                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6602                         return false;
6603                 return true;
6604         }
6605 }
6606
6607 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6608                 uint32_t inst, uint32_t reg_addr, char reg_name[],
6609                 uint32_t expected_value, uint32_t mask)
6610 {
6611         uint32_t ret = 0;
6612         uint32_t old_ = 0;
6613         uint32_t tmp_ = RREG32(reg_addr);
6614         uint32_t loop = adev->usec_timeout;
6615
6616         while ((tmp_ & (mask)) != (expected_value)) {
6617                 if (old_ != tmp_) {
6618                         loop = adev->usec_timeout;
6619                         old_ = tmp_;
6620                 } else
6621                         udelay(1);
6622                 tmp_ = RREG32(reg_addr);
6623                 loop--;
6624                 if (!loop) {
6625                         DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6626                                   inst, reg_name, (uint32_t)expected_value,
6627                                   (uint32_t)(tmp_ & (mask)));
6628                         ret = -ETIMEDOUT;
6629                         break;
6630                 }
6631         }
6632         return ret;
6633 }