drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

   1 /*
   2  * Copyright 2008 Advanced Micro Devices, Inc.
   3  * Copyright 2008 Red Hat Inc.
   4  * Copyright 2009 Jerome Glisse.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors: Dave Airlie
  25  *          Alex Deucher
  26  *          Jerome Glisse
  27  */
  28 #include <linux/power_supply.h>
  29 #include <linux/kthread.h>
  30 #include <linux/module.h>
  31 #include <linux/console.h>
  32 #include <linux/slab.h>
  33 #include <linux/iommu.h>
  34 #include <linux/pci.h>
  35 #include <linux/pci-p2pdma.h>
  36 #include <linux/apple-gmux.h>
  37
  38 #include <drm/drm_aperture.h>
  39 #include <drm/drm_atomic_helper.h>
  40 #include <drm/drm_crtc_helper.h>
  41 #include <drm/drm_fb_helper.h>
  42 #include <drm/drm_probe_helper.h>
  43 #include <drm/amdgpu_drm.h>
  44 #include <linux/device.h>
  45 #include <linux/vgaarb.h>
  46 #include <linux/vga_switcheroo.h>
  47 #include <linux/efi.h>
  48 #include "amdgpu.h"
  49 #include "amdgpu_trace.h"
  50 #include "amdgpu_i2c.h"
  51 #include "atom.h"
  52 #include "amdgpu_atombios.h"
  53 #include "amdgpu_atomfirmware.h"
  54 #include "amd_pcie.h"
  55 #ifdef CONFIG_DRM_AMDGPU_SI
  56 #include "si.h"
  57 #endif
  58 #ifdef CONFIG_DRM_AMDGPU_CIK
  59 #include "cik.h"
  60 #endif
  61 #include "vi.h"
  62 #include "soc15.h"
  63 #include "nv.h"
  64 #include "bif/bif_4_1_d.h"
  65 #include <linux/firmware.h>
  66 #include "amdgpu_vf_error.h"
  67
  68 #include "amdgpu_amdkfd.h"
  69 #include "amdgpu_pm.h"
  70
  71 #include "amdgpu_xgmi.h"
  72 #include "amdgpu_ras.h"
  73 #include "amdgpu_pmu.h"
  74 #include "amdgpu_fru_eeprom.h"
  75 #include "amdgpu_reset.h"
  76 #include "amdgpu_virt.h"
  77 #include "amdgpu_dev_coredump.h"
  78
  79 #include <linux/suspend.h>
  80 #include <drm/task_barrier.h>
  81 #include <linux/pm_runtime.h>
  82
  83 #include <drm/drm_drv.h>
  84
  85 #if IS_ENABLED(CONFIG_X86)
  86 #include <asm/intel-family.h>
  87 #endif
  88
  89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
  91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
  92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
  93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
  94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
  95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
  96
  97 #define AMDGPU_RESUME_MS                2000
  98 #define AMDGPU_MAX_RETRY_LIMIT          2
  99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
 100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
 101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
 102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
 103
 104 static const struct drm_driver amdgpu_kms_driver;
 105
 106 const char *amdgpu_asic_name[] = {
 107         "TAHITI",
 108         "PITCAIRN",
 109         "VERDE",
 110         "OLAND",
 111         "HAINAN",
 112         "BONAIRE",
 113         "KAVERI",
 114         "KABINI",
 115         "HAWAII",
 116         "MULLINS",
 117         "TOPAZ",
 118         "TONGA",
 119         "FIJI",
 120         "CARRIZO",
 121         "STONEY",
 122         "POLARIS10",
 123         "POLARIS11",
 124         "POLARIS12",
 125         "VEGAM",
 126         "VEGA10",
 127         "VEGA12",
 128         "VEGA20",
 129         "RAVEN",
 130         "ARCTURUS",
 131         "RENOIR",
 132         "ALDEBARAN",
 133         "NAVI10",
 134         "CYAN_SKILLFISH",
 135         "NAVI14",
 136         "NAVI12",
 137         "SIENNA_CICHLID",
 138         "NAVY_FLOUNDER",
 139         "VANGOGH",
 140         "DIMGREY_CAVEFISH",
 141         "BEIGE_GOBY",
 142         "YELLOW_CARP",
 143         "IP DISCOVERY",
 144         "LAST",
 145 };
 146
 147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
 148
 149 /**
 150  * DOC: pcie_replay_count
 151  *
 152  * The amdgpu driver provides a sysfs API for reporting the total number
 153  * of PCIe replays (NAKs)
 154  * The file pcie_replay_count is used for this and returns the total
 155  * number of replays as a sum of the NAKs generated and NAKs received
 156  */
 157
 158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 159                 struct device_attribute *attr, char *buf)
 160 {
 161         struct drm_device *ddev = dev_get_drvdata(dev);
 162         struct amdgpu_device *adev = drm_to_adev(ddev);
 163         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
 164
 165         return sysfs_emit(buf, "%llu\n", cnt);
 166 }
 167
 168 static DEVICE_ATTR(pcie_replay_count, 0444,
 169                 amdgpu_device_get_pcie_replay_count, NULL);
 170
 171 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
 172                                           struct bin_attribute *attr, char *buf,
 173                                           loff_t ppos, size_t count)
 174 {
 175         struct device *dev = kobj_to_dev(kobj);
 176         struct drm_device *ddev = dev_get_drvdata(dev);
 177         struct amdgpu_device *adev = drm_to_adev(ddev);
 178         ssize_t bytes_read;
 179
 180         switch (ppos) {
 181         case AMDGPU_SYS_REG_STATE_XGMI:
 182                 bytes_read = amdgpu_asic_get_reg_state(
 183                         adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
 184                 break;
 185         case AMDGPU_SYS_REG_STATE_WAFL:
 186                 bytes_read = amdgpu_asic_get_reg_state(
 187                         adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
 188                 break;
 189         case AMDGPU_SYS_REG_STATE_PCIE:
 190                 bytes_read = amdgpu_asic_get_reg_state(
 191                         adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
 192                 break;
 193         case AMDGPU_SYS_REG_STATE_USR:
 194                 bytes_read = amdgpu_asic_get_reg_state(
 195                         adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
 196                 break;
 197         case AMDGPU_SYS_REG_STATE_USR_1:
 198                 bytes_read = amdgpu_asic_get_reg_state(
 199                         adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
 200                 break;
 201         default:
 202                 return -EINVAL;
 203         }
 204
 205         return bytes_read;
 206 }
 207
 208 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
 209          AMDGPU_SYS_REG_STATE_END);
 210
 211 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
 212 {
 213         int ret;
 214
 215         if (!amdgpu_asic_get_reg_state_supported(adev))
 216                 return 0;
 217
 218         ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 219
 220         return ret;
 221 }
 222
 223 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
 224 {
 225         if (!amdgpu_asic_get_reg_state_supported(adev))
 226                 return;
 227         sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
 228 }
 229
 230 /**
 231  * DOC: board_info
 232  *
 233  * The amdgpu driver provides a sysfs API for giving board related information.
 234  * It provides the form factor information in the format
 235  *
 236  *   type : form factor
 237  *
 238  * Possible form factor values
 239  *
 240  * - "cem"              - PCIE CEM card
 241  * - "oam"              - Open Compute Accelerator Module
 242  * - "unknown"  - Not known
 243  *
 244  */
 245
 246 static ssize_t amdgpu_device_get_board_info(struct device *dev,
 247                                             struct device_attribute *attr,
 248                                             char *buf)
 249 {
 250         struct drm_device *ddev = dev_get_drvdata(dev);
 251         struct amdgpu_device *adev = drm_to_adev(ddev);
 252         enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
 253         const char *pkg;
 254
 255         if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
 256                 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
 257
 258         switch (pkg_type) {
 259         case AMDGPU_PKG_TYPE_CEM:
 260                 pkg = "cem";
 261                 break;
 262         case AMDGPU_PKG_TYPE_OAM:
 263                 pkg = "oam";
 264                 break;
 265         default:
 266                 pkg = "unknown";
 267                 break;
 268         }
 269
 270         return sysfs_emit(buf, "%s : %s\n", "type", pkg);
 271 }
 272
 273 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
 274
 275 static struct attribute *amdgpu_board_attrs[] = {
 276         &dev_attr_board_info.attr,
 277         NULL,
 278 };
 279
 280 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
 281                                              struct attribute *attr, int n)
 282 {
 283         struct device *dev = kobj_to_dev(kobj);
 284         struct drm_device *ddev = dev_get_drvdata(dev);
 285         struct amdgpu_device *adev = drm_to_adev(ddev);
 286
 287         if (adev->flags & AMD_IS_APU)
 288                 return 0;
 289
 290         return attr->mode;
 291 }
 292
 293 static const struct attribute_group amdgpu_board_attrs_group = {
 294         .attrs = amdgpu_board_attrs,
 295         .is_visible = amdgpu_board_attrs_is_visible
 296 };
 297
 298 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 299
 300
 301 /**
 302  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
 303  *
 304  * @dev: drm_device pointer
 305  *
 306  * Returns true if the device is a dGPU with ATPX power control,
 307  * otherwise return false.
 308  */
 309 bool amdgpu_device_supports_px(struct drm_device *dev)
 310 {
 311         struct amdgpu_device *adev = drm_to_adev(dev);
 312
 313         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
 314                 return true;
 315         return false;
 316 }
 317
 318 /**
 319  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
 320  *
 321  * @dev: drm_device pointer
 322  *
 323  * Returns true if the device is a dGPU with ACPI power control,
 324  * otherwise return false.
 325  */
 326 bool amdgpu_device_supports_boco(struct drm_device *dev)
 327 {
 328         struct amdgpu_device *adev = drm_to_adev(dev);
 329
 330         if (adev->has_pr3 ||
 331             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
 332                 return true;
 333         return false;
 334 }
 335
 336 /**
 337  * amdgpu_device_supports_baco - Does the device support BACO
 338  *
 339  * @dev: drm_device pointer
 340  *
 341  * Return:
 342  * 1 if the device supporte BACO;
 343  * 3 if the device support MACO (only works if BACO is supported)
 344  * otherwise return 0.
 345  */
 346 int amdgpu_device_supports_baco(struct drm_device *dev)
 347 {
 348         struct amdgpu_device *adev = drm_to_adev(dev);
 349
 350         return amdgpu_asic_supports_baco(adev);
 351 }
 352
 353 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
 354 {
 355         struct drm_device *dev;
 356         int bamaco_support;
 357
 358         dev = adev_to_drm(adev);
 359
 360         adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
 361         bamaco_support = amdgpu_device_supports_baco(dev);
 362
 363         switch (amdgpu_runtime_pm) {
 364         case 2:
 365                 if (bamaco_support & MACO_SUPPORT) {
 366                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 367                         dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
 368                 } else if (bamaco_support == BACO_SUPPORT) {
 369                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 370                         dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
 371                 }
 372                 break;
 373         case 1:
 374                 if (bamaco_support & BACO_SUPPORT) {
 375                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 376                         dev_info(adev->dev, "Forcing BACO for runtime pm\n");
 377                 }
 378                 break;
 379         case -1:
 380         case -2:
 381                 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
 382                         adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
 383                         dev_info(adev->dev, "Using ATPX for runtime pm\n");
 384                 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
 385                         adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
 386                         dev_info(adev->dev, "Using BOCO for runtime pm\n");
 387                 } else {
 388                         if (!bamaco_support)
 389                                 goto no_runtime_pm;
 390
 391                         switch (adev->asic_type) {
 392                         case CHIP_VEGA20:
 393                         case CHIP_ARCTURUS:
 394                                 /* BACO are not supported on vega20 and arctrus */
 395                                 break;
 396                         case CHIP_VEGA10:
 397                                 /* enable BACO as runpm mode if noretry=0 */
 398                                 if (!adev->gmc.noretry)
 399                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 400                                 break;
 401                         default:
 402                                 /* enable BACO as runpm mode on CI+ */
 403                                 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
 404                                 break;
 405                         }
 406
 407                         if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
 408                                 if (bamaco_support & MACO_SUPPORT) {
 409                                         adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
 410                                         dev_info(adev->dev, "Using BAMACO for runtime pm\n");
 411                                 } else {
 412                                         dev_info(adev->dev, "Using BACO for runtime pm\n");
 413                                 }
 414                         }
 415                 }
 416                 break;
 417         case 0:
 418                 dev_info(adev->dev, "runtime pm is manually disabled\n");
 419                 break;
 420         default:
 421                 break;
 422         }
 423
 424 no_runtime_pm:
 425         if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
 426                 dev_info(adev->dev, "Runtime PM not available\n");
 427 }
 428 /**
 429  * amdgpu_device_supports_smart_shift - Is the device dGPU with
 430  * smart shift support
 431  *
 432  * @dev: drm_device pointer
 433  *
 434  * Returns true if the device is a dGPU with Smart Shift support,
 435  * otherwise returns false.
 436  */
 437 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
 438 {
 439         return (amdgpu_device_supports_boco(dev) &&
 440                 amdgpu_acpi_is_power_shift_control_supported());
 441 }
 442
 443 /*
 444  * VRAM access helper functions
 445  */
 446
 447 /**
 448  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
 449  *
 450  * @adev: amdgpu_device pointer
 451  * @pos: offset of the buffer in vram
 452  * @buf: virtual address of the buffer in system memory
 453  * @size: read/write size, sizeof(@buf) must > @size
 454  * @write: true - write to vram, otherwise - read from vram
 455  */
 456 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
 457                              void *buf, size_t size, bool write)
 458 {
 459         unsigned long flags;
 460         uint32_t hi = ~0, tmp = 0;
 461         uint32_t *data = buf;
 462         uint64_t last;
 463         int idx;
 464
 465         if (!drm_dev_enter(adev_to_drm(adev), &idx))
 466                 return;
 467
 468         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
 469
 470         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 471         for (last = pos + size; pos < last; pos += 4) {
 472                 tmp = pos >> 31;
 473
 474                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
 475                 if (tmp != hi) {
 476                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
 477                         hi = tmp;
 478                 }
 479                 if (write)
 480                         WREG32_NO_KIQ(mmMM_DATA, *data++);
 481                 else
 482                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
 483         }
 484
 485         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
 486         drm_dev_exit(idx);
 487 }
 488
 489 /**
 490  * amdgpu_device_aper_access - access vram by vram aperature
 491  *
 492  * @adev: amdgpu_device pointer
 493  * @pos: offset of the buffer in vram
 494  * @buf: virtual address of the buffer in system memory
 495  * @size: read/write size, sizeof(@buf) must > @size
 496  * @write: true - write to vram, otherwise - read from vram
 497  *
 498  * The return value means how many bytes have been transferred.
 499  */
 500 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
 501                                  void *buf, size_t size, bool write)
 502 {
 503 #ifdef CONFIG_64BIT
 504         void __iomem *addr;
 505         size_t count = 0;
 506         uint64_t last;
 507
 508         if (!adev->mman.aper_base_kaddr)
 509                 return 0;
 510
 511         last = min(pos + size, adev->gmc.visible_vram_size);
 512         if (last > pos) {
 513                 addr = adev->mman.aper_base_kaddr + pos;
 514                 count = last - pos;
 515
 516                 if (write) {
 517                         memcpy_toio(addr, buf, count);
 518                         /* Make sure HDP write cache flush happens without any reordering
 519                          * after the system memory contents are sent over PCIe device
 520                          */
 521                         mb();
 522                         amdgpu_device_flush_hdp(adev, NULL);
 523                 } else {
 524                         amdgpu_device_invalidate_hdp(adev, NULL);
 525                         /* Make sure HDP read cache is invalidated before issuing a read
 526                          * to the PCIe device
 527                          */
 528                         mb();
 529                         memcpy_fromio(buf, addr, count);
 530                 }
 531
 532         }
 533
 534         return count;
 535 #else
 536         return 0;
 537 #endif
 538 }
 539
 540 /**
 541  * amdgpu_device_vram_access - read/write a buffer in vram
 542  *
 543  * @adev: amdgpu_device pointer
 544  * @pos: offset of the buffer in vram
 545  * @buf: virtual address of the buffer in system memory
 546  * @size: read/write size, sizeof(@buf) must > @size
 547  * @write: true - write to vram, otherwise - read from vram
 548  */
 549 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 550                                void *buf, size_t size, bool write)
 551 {
 552         size_t count;
 553
 554         /* try to using vram apreature to access vram first */
 555         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
 556         size -= count;
 557         if (size) {
 558                 /* using MM to access rest vram */
 559                 pos += count;
 560                 buf += count;
 561                 amdgpu_device_mm_access(adev, pos, buf, size, write);
 562         }
 563 }
 564
 565 /*
 566  * register access helper functions.
 567  */
 568
 569 /* Check if hw access should be skipped because of hotplug or device error */
 570 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
 571 {
 572         if (adev->no_hw_access)
 573                 return true;
 574
 575 #ifdef CONFIG_LOCKDEP
 576         /*
 577          * This is a bit complicated to understand, so worth a comment. What we assert
 578          * here is that the GPU reset is not running on another thread in parallel.
 579          *
 580          * For this we trylock the read side of the reset semaphore, if that succeeds
 581          * we know that the reset is not running in paralell.
 582          *
 583          * If the trylock fails we assert that we are either already holding the read
 584          * side of the lock or are the reset thread itself and hold the write side of
 585          * the lock.
 586          */
 587         if (in_task()) {
 588                 if (down_read_trylock(&adev->reset_domain->sem))
 589                         up_read(&adev->reset_domain->sem);
 590                 else
 591                         lockdep_assert_held(&adev->reset_domain->sem);
 592         }
 593 #endif
 594         return false;
 595 }
 596
 597 /**
 598  * amdgpu_device_rreg - read a memory mapped IO or indirect register
 599  *
 600  * @adev: amdgpu_device pointer
 601  * @reg: dword aligned register offset
 602  * @acc_flags: access flags which require special behavior
 603  *
 604  * Returns the 32 bit value from the offset specified.
 605  */
 606 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 607                             uint32_t reg, uint32_t acc_flags)
 608 {
 609         uint32_t ret;
 610
 611         if (amdgpu_device_skip_hw_access(adev))
 612                 return 0;
 613
 614         if ((reg * 4) < adev->rmmio_size) {
 615                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 616                     amdgpu_sriov_runtime(adev) &&
 617                     down_read_trylock(&adev->reset_domain->sem)) {
 618                         ret = amdgpu_kiq_rreg(adev, reg, 0);
 619                         up_read(&adev->reset_domain->sem);
 620                 } else {
 621                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 622                 }
 623         } else {
 624                 ret = adev->pcie_rreg(adev, reg * 4);
 625         }
 626
 627         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
 628
 629         return ret;
 630 }
 631
 632 /*
 633  * MMIO register read with bytes helper functions
 634  * @offset:bytes offset from MMIO start
 635  */
 636
 637 /**
 638  * amdgpu_mm_rreg8 - read a memory mapped IO register
 639  *
 640  * @adev: amdgpu_device pointer
 641  * @offset: byte aligned register offset
 642  *
 643  * Returns the 8 bit value from the offset specified.
 644  */
 645 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 646 {
 647         if (amdgpu_device_skip_hw_access(adev))
 648                 return 0;
 649
 650         if (offset < adev->rmmio_size)
 651                 return (readb(adev->rmmio + offset));
 652         BUG();
 653 }
 654
 655
 656 /**
 657  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
 658  *
 659  * @adev: amdgpu_device pointer
 660  * @reg: dword aligned register offset
 661  * @acc_flags: access flags which require special behavior
 662  * @xcc_id: xcc accelerated compute core id
 663  *
 664  * Returns the 32 bit value from the offset specified.
 665  */
 666 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
 667                                 uint32_t reg, uint32_t acc_flags,
 668                                 uint32_t xcc_id)
 669 {
 670         uint32_t ret, rlcg_flag;
 671
 672         if (amdgpu_device_skip_hw_access(adev))
 673                 return 0;
 674
 675         if ((reg * 4) < adev->rmmio_size) {
 676                 if (amdgpu_sriov_vf(adev) &&
 677                     !amdgpu_sriov_runtime(adev) &&
 678                     adev->gfx.rlc.rlcg_reg_access_supported &&
 679                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 680                                                          GC_HWIP, false,
 681                                                          &rlcg_flag)) {
 682                         ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
 683                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 684                     amdgpu_sriov_runtime(adev) &&
 685                     down_read_trylock(&adev->reset_domain->sem)) {
 686                         ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
 687                         up_read(&adev->reset_domain->sem);
 688                 } else {
 689                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 690                 }
 691         } else {
 692                 ret = adev->pcie_rreg(adev, reg * 4);
 693         }
 694
 695         return ret;
 696 }
 697
 698 /*
 699  * MMIO register write with bytes helper functions
 700  * @offset:bytes offset from MMIO start
 701  * @value: the value want to be written to the register
 702  */
 703
 704 /**
 705  * amdgpu_mm_wreg8 - read a memory mapped IO register
 706  *
 707  * @adev: amdgpu_device pointer
 708  * @offset: byte aligned register offset
 709  * @value: 8 bit value to write
 710  *
 711  * Writes the value specified to the offset specified.
 712  */
 713 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 714 {
 715         if (amdgpu_device_skip_hw_access(adev))
 716                 return;
 717
 718         if (offset < adev->rmmio_size)
 719                 writeb(value, adev->rmmio + offset);
 720         else
 721                 BUG();
 722 }
 723
 724 /**
 725  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
 726  *
 727  * @adev: amdgpu_device pointer
 728  * @reg: dword aligned register offset
 729  * @v: 32 bit value to write to the register
 730  * @acc_flags: access flags which require special behavior
 731  *
 732  * Writes the value specified to the offset specified.
 733  */
 734 void amdgpu_device_wreg(struct amdgpu_device *adev,
 735                         uint32_t reg, uint32_t v,
 736                         uint32_t acc_flags)
 737 {
 738         if (amdgpu_device_skip_hw_access(adev))
 739                 return;
 740
 741         if ((reg * 4) < adev->rmmio_size) {
 742                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 743                     amdgpu_sriov_runtime(adev) &&
 744                     down_read_trylock(&adev->reset_domain->sem)) {
 745                         amdgpu_kiq_wreg(adev, reg, v, 0);
 746                         up_read(&adev->reset_domain->sem);
 747                 } else {
 748                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 749                 }
 750         } else {
 751                 adev->pcie_wreg(adev, reg * 4, v);
 752         }
 753
 754         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
 755 }
 756
 757 /**
 758  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
 759  *
 760  * @adev: amdgpu_device pointer
 761  * @reg: mmio/rlc register
 762  * @v: value to write
 763  * @xcc_id: xcc accelerated compute core id
 764  *
 765  * this function is invoked only for the debugfs register access
 766  */
 767 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 768                              uint32_t reg, uint32_t v,
 769                              uint32_t xcc_id)
 770 {
 771         if (amdgpu_device_skip_hw_access(adev))
 772                 return;
 773
 774         if (amdgpu_sriov_fullaccess(adev) &&
 775             adev->gfx.rlc.funcs &&
 776             adev->gfx.rlc.funcs->is_rlcg_access_range) {
 777                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
 778                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
 779         } else if ((reg * 4) >= adev->rmmio_size) {
 780                 adev->pcie_wreg(adev, reg * 4, v);
 781         } else {
 782                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 783         }
 784 }
 785
 786 /**
 787  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
 788  *
 789  * @adev: amdgpu_device pointer
 790  * @reg: dword aligned register offset
 791  * @v: 32 bit value to write to the register
 792  * @acc_flags: access flags which require special behavior
 793  * @xcc_id: xcc accelerated compute core id
 794  *
 795  * Writes the value specified to the offset specified.
 796  */
 797 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
 798                         uint32_t reg, uint32_t v,
 799                         uint32_t acc_flags, uint32_t xcc_id)
 800 {
 801         uint32_t rlcg_flag;
 802
 803         if (amdgpu_device_skip_hw_access(adev))
 804                 return;
 805
 806         if ((reg * 4) < adev->rmmio_size) {
 807                 if (amdgpu_sriov_vf(adev) &&
 808                     !amdgpu_sriov_runtime(adev) &&
 809                     adev->gfx.rlc.rlcg_reg_access_supported &&
 810                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
 811                                                          GC_HWIP, true,
 812                                                          &rlcg_flag)) {
 813                         amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
 814                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
 815                     amdgpu_sriov_runtime(adev) &&
 816                     down_read_trylock(&adev->reset_domain->sem)) {
 817                         amdgpu_kiq_wreg(adev, reg, v, xcc_id);
 818                         up_read(&adev->reset_domain->sem);
 819                 } else {
 820                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 821                 }
 822         } else {
 823                 adev->pcie_wreg(adev, reg * 4, v);
 824         }
 825 }
 826
 827 /**
 828  * amdgpu_device_indirect_rreg - read an indirect register
 829  *
 830  * @adev: amdgpu_device pointer
 831  * @reg_addr: indirect register address to read from
 832  *
 833  * Returns the value of indirect register @reg_addr
 834  */
 835 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 836                                 u32 reg_addr)
 837 {
 838         unsigned long flags, pcie_index, pcie_data;
 839         void __iomem *pcie_index_offset;
 840         void __iomem *pcie_data_offset;
 841         u32 r;
 842
 843         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 844         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 845
 846         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 847         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 848         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 849
 850         writel(reg_addr, pcie_index_offset);
 851         readl(pcie_index_offset);
 852         r = readl(pcie_data_offset);
 853         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 854
 855         return r;
 856 }
 857
 858 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
 859                                     u64 reg_addr)
 860 {
 861         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
 862         u32 r;
 863         void __iomem *pcie_index_offset;
 864         void __iomem *pcie_index_hi_offset;
 865         void __iomem *pcie_data_offset;
 866
 867         if (unlikely(!adev->nbio.funcs)) {
 868                 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
 869                 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
 870         } else {
 871                 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 872                 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 873         }
 874
 875         if (reg_addr >> 32) {
 876                 if (unlikely(!adev->nbio.funcs))
 877                         pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
 878                 else
 879                         pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 880         } else {
 881                 pcie_index_hi = 0;
 882         }
 883
 884         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 885         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 886         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 887         if (pcie_index_hi != 0)
 888                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 889                                 pcie_index_hi * 4;
 890
 891         writel(reg_addr, pcie_index_offset);
 892         readl(pcie_index_offset);
 893         if (pcie_index_hi != 0) {
 894                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 895                 readl(pcie_index_hi_offset);
 896         }
 897         r = readl(pcie_data_offset);
 898
 899         /* clear the high bits */
 900         if (pcie_index_hi != 0) {
 901                 writel(0, pcie_index_hi_offset);
 902                 readl(pcie_index_hi_offset);
 903         }
 904
 905         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 906
 907         return r;
 908 }
 909
 910 /**
 911  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
 912  *
 913  * @adev: amdgpu_device pointer
 914  * @reg_addr: indirect register address to read from
 915  *
 916  * Returns the value of indirect register @reg_addr
 917  */
 918 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 919                                   u32 reg_addr)
 920 {
 921         unsigned long flags, pcie_index, pcie_data;
 922         void __iomem *pcie_index_offset;
 923         void __iomem *pcie_data_offset;
 924         u64 r;
 925
 926         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 927         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 928
 929         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 930         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 931         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 932
 933         /* read low 32 bits */
 934         writel(reg_addr, pcie_index_offset);
 935         readl(pcie_index_offset);
 936         r = readl(pcie_data_offset);
 937         /* read high 32 bits */
 938         writel(reg_addr + 4, pcie_index_offset);
 939         readl(pcie_index_offset);
 940         r |= ((u64)readl(pcie_data_offset) << 32);
 941         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 942
 943         return r;
 944 }
 945
 946 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
 947                                   u64 reg_addr)
 948 {
 949         unsigned long flags, pcie_index, pcie_data;
 950         unsigned long pcie_index_hi = 0;
 951         void __iomem *pcie_index_offset;
 952         void __iomem *pcie_index_hi_offset;
 953         void __iomem *pcie_data_offset;
 954         u64 r;
 955
 956         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
 957         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
 958         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
 959                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
 960
 961         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 962         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
 963         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
 964         if (pcie_index_hi != 0)
 965                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
 966                         pcie_index_hi * 4;
 967
 968         /* read low 32 bits */
 969         writel(reg_addr, pcie_index_offset);
 970         readl(pcie_index_offset);
 971         if (pcie_index_hi != 0) {
 972                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 973                 readl(pcie_index_hi_offset);
 974         }
 975         r = readl(pcie_data_offset);
 976         /* read high 32 bits */
 977         writel(reg_addr + 4, pcie_index_offset);
 978         readl(pcie_index_offset);
 979         if (pcie_index_hi != 0) {
 980                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
 981                 readl(pcie_index_hi_offset);
 982         }
 983         r |= ((u64)readl(pcie_data_offset) << 32);
 984
 985         /* clear the high bits */
 986         if (pcie_index_hi != 0) {
 987                 writel(0, pcie_index_hi_offset);
 988                 readl(pcie_index_hi_offset);
 989         }
 990
 991         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 992
 993         return r;
 994 }
 995
 996 /**
 997  * amdgpu_device_indirect_wreg - write an indirect register address
 998  *
 999  * @adev: amdgpu_device pointer
1000  * @reg_addr: indirect register offset
1001  * @reg_data: indirect register data
1002  *
1003  */
1004 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1005                                  u32 reg_addr, u32 reg_data)
1006 {
1007         unsigned long flags, pcie_index, pcie_data;
1008         void __iomem *pcie_index_offset;
1009         void __iomem *pcie_data_offset;
1010
1011         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1012         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1013
1014         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1015         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1016         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1017
1018         writel(reg_addr, pcie_index_offset);
1019         readl(pcie_index_offset);
1020         writel(reg_data, pcie_data_offset);
1021         readl(pcie_data_offset);
1022         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1023 }
1024
1025 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1026                                      u64 reg_addr, u32 reg_data)
1027 {
1028         unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1029         void __iomem *pcie_index_offset;
1030         void __iomem *pcie_index_hi_offset;
1031         void __iomem *pcie_data_offset;
1032
1033         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1034         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1035         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1036                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1037         else
1038                 pcie_index_hi = 0;
1039
1040         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1041         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1042         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1043         if (pcie_index_hi != 0)
1044                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1045                                 pcie_index_hi * 4;
1046
1047         writel(reg_addr, pcie_index_offset);
1048         readl(pcie_index_offset);
1049         if (pcie_index_hi != 0) {
1050                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1051                 readl(pcie_index_hi_offset);
1052         }
1053         writel(reg_data, pcie_data_offset);
1054         readl(pcie_data_offset);
1055
1056         /* clear the high bits */
1057         if (pcie_index_hi != 0) {
1058                 writel(0, pcie_index_hi_offset);
1059                 readl(pcie_index_hi_offset);
1060         }
1061
1062         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1063 }
1064
1065 /**
1066  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1067  *
1068  * @adev: amdgpu_device pointer
1069  * @reg_addr: indirect register offset
1070  * @reg_data: indirect register data
1071  *
1072  */
1073 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1074                                    u32 reg_addr, u64 reg_data)
1075 {
1076         unsigned long flags, pcie_index, pcie_data;
1077         void __iomem *pcie_index_offset;
1078         void __iomem *pcie_data_offset;
1079
1080         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1081         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1082
1083         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1084         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1085         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1086
1087         /* write low 32 bits */
1088         writel(reg_addr, pcie_index_offset);
1089         readl(pcie_index_offset);
1090         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1091         readl(pcie_data_offset);
1092         /* write high 32 bits */
1093         writel(reg_addr + 4, pcie_index_offset);
1094         readl(pcie_index_offset);
1095         writel((u32)(reg_data >> 32), pcie_data_offset);
1096         readl(pcie_data_offset);
1097         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1098 }
1099
1100 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1101                                    u64 reg_addr, u64 reg_data)
1102 {
1103         unsigned long flags, pcie_index, pcie_data;
1104         unsigned long pcie_index_hi = 0;
1105         void __iomem *pcie_index_offset;
1106         void __iomem *pcie_index_hi_offset;
1107         void __iomem *pcie_data_offset;
1108
1109         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1110         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1111         if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1112                 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1113
1114         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1115         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1116         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1117         if (pcie_index_hi != 0)
1118                 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1119                                 pcie_index_hi * 4;
1120
1121         /* write low 32 bits */
1122         writel(reg_addr, pcie_index_offset);
1123         readl(pcie_index_offset);
1124         if (pcie_index_hi != 0) {
1125                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1126                 readl(pcie_index_hi_offset);
1127         }
1128         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1129         readl(pcie_data_offset);
1130         /* write high 32 bits */
1131         writel(reg_addr + 4, pcie_index_offset);
1132         readl(pcie_index_offset);
1133         if (pcie_index_hi != 0) {
1134                 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1135                 readl(pcie_index_hi_offset);
1136         }
1137         writel((u32)(reg_data >> 32), pcie_data_offset);
1138         readl(pcie_data_offset);
1139
1140         /* clear the high bits */
1141         if (pcie_index_hi != 0) {
1142                 writel(0, pcie_index_hi_offset);
1143                 readl(pcie_index_hi_offset);
1144         }
1145
1146         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147 }
1148
1149 /**
1150  * amdgpu_device_get_rev_id - query device rev_id
1151  *
1152  * @adev: amdgpu_device pointer
1153  *
1154  * Return device rev_id
1155  */
1156 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1157 {
1158         return adev->nbio.funcs->get_rev_id(adev);
1159 }
1160
1161 /**
1162  * amdgpu_invalid_rreg - dummy reg read function
1163  *
1164  * @adev: amdgpu_device pointer
1165  * @reg: offset of register
1166  *
1167  * Dummy register read function.  Used for register blocks
1168  * that certain asics don't have (all asics).
1169  * Returns the value in the register.
1170  */
1171 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1172 {
1173         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1174         BUG();
1175         return 0;
1176 }
1177
1178 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1179 {
1180         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1181         BUG();
1182         return 0;
1183 }
1184
1185 /**
1186  * amdgpu_invalid_wreg - dummy reg write function
1187  *
1188  * @adev: amdgpu_device pointer
1189  * @reg: offset of register
1190  * @v: value to write to the register
1191  *
1192  * Dummy register read function.  Used for register blocks
1193  * that certain asics don't have (all asics).
1194  */
1195 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1196 {
1197         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1198                   reg, v);
1199         BUG();
1200 }
1201
1202 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1203 {
1204         DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1205                   reg, v);
1206         BUG();
1207 }
1208
1209 /**
1210  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1211  *
1212  * @adev: amdgpu_device pointer
1213  * @reg: offset of register
1214  *
1215  * Dummy register read function.  Used for register blocks
1216  * that certain asics don't have (all asics).
1217  * Returns the value in the register.
1218  */
1219 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1220 {
1221         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1222         BUG();
1223         return 0;
1224 }
1225
1226 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1227 {
1228         DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1229         BUG();
1230         return 0;
1231 }
1232
1233 /**
1234  * amdgpu_invalid_wreg64 - dummy reg write function
1235  *
1236  * @adev: amdgpu_device pointer
1237  * @reg: offset of register
1238  * @v: value to write to the register
1239  *
1240  * Dummy register read function.  Used for register blocks
1241  * that certain asics don't have (all asics).
1242  */
1243 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1244 {
1245         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1246                   reg, v);
1247         BUG();
1248 }
1249
1250 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1251 {
1252         DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1253                   reg, v);
1254         BUG();
1255 }
1256
1257 /**
1258  * amdgpu_block_invalid_rreg - dummy reg read function
1259  *
1260  * @adev: amdgpu_device pointer
1261  * @block: offset of instance
1262  * @reg: offset of register
1263  *
1264  * Dummy register read function.  Used for register blocks
1265  * that certain asics don't have (all asics).
1266  * Returns the value in the register.
1267  */
1268 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1269                                           uint32_t block, uint32_t reg)
1270 {
1271         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1272                   reg, block);
1273         BUG();
1274         return 0;
1275 }
1276
1277 /**
1278  * amdgpu_block_invalid_wreg - dummy reg write function
1279  *
1280  * @adev: amdgpu_device pointer
1281  * @block: offset of instance
1282  * @reg: offset of register
1283  * @v: value to write to the register
1284  *
1285  * Dummy register read function.  Used for register blocks
1286  * that certain asics don't have (all asics).
1287  */
1288 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1289                                       uint32_t block,
1290                                       uint32_t reg, uint32_t v)
1291 {
1292         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1293                   reg, block, v);
1294         BUG();
1295 }
1296
1297 /**
1298  * amdgpu_device_asic_init - Wrapper for atom asic_init
1299  *
1300  * @adev: amdgpu_device pointer
1301  *
1302  * Does any asic specific work and then calls atom asic init.
1303  */
1304 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1305 {
1306         int ret;
1307
1308         amdgpu_asic_pre_asic_init(adev);
1309
1310         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1311             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1312             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1313                 amdgpu_psp_wait_for_bootloader(adev);
1314                 ret = amdgpu_atomfirmware_asic_init(adev, true);
1315                 return ret;
1316         } else {
1317                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1318         }
1319
1320         return 0;
1321 }
1322
1323 /**
1324  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1325  *
1326  * @adev: amdgpu_device pointer
1327  *
1328  * Allocates a scratch page of VRAM for use by various things in the
1329  * driver.
1330  */
1331 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1332 {
1333         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1334                                        AMDGPU_GEM_DOMAIN_VRAM |
1335                                        AMDGPU_GEM_DOMAIN_GTT,
1336                                        &adev->mem_scratch.robj,
1337                                        &adev->mem_scratch.gpu_addr,
1338                                        (void **)&adev->mem_scratch.ptr);
1339 }
1340
1341 /**
1342  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1343  *
1344  * @adev: amdgpu_device pointer
1345  *
1346  * Frees the VRAM scratch page.
1347  */
1348 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1349 {
1350         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1351 }
1352
1353 /**
1354  * amdgpu_device_program_register_sequence - program an array of registers.
1355  *
1356  * @adev: amdgpu_device pointer
1357  * @registers: pointer to the register array
1358  * @array_size: size of the register array
1359  *
1360  * Programs an array or registers with and or masks.
1361  * This is a helper for setting golden registers.
1362  */
1363 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1364                                              const u32 *registers,
1365                                              const u32 array_size)
1366 {
1367         u32 tmp, reg, and_mask, or_mask;
1368         int i;
1369
1370         if (array_size % 3)
1371                 return;
1372
1373         for (i = 0; i < array_size; i += 3) {
1374                 reg = registers[i + 0];
1375                 and_mask = registers[i + 1];
1376                 or_mask = registers[i + 2];
1377
1378                 if (and_mask == 0xffffffff) {
1379                         tmp = or_mask;
1380                 } else {
1381                         tmp = RREG32(reg);
1382                         tmp &= ~and_mask;
1383                         if (adev->family >= AMDGPU_FAMILY_AI)
1384                                 tmp |= (or_mask & and_mask);
1385                         else
1386                                 tmp |= or_mask;
1387                 }
1388                 WREG32(reg, tmp);
1389         }
1390 }
1391
1392 /**
1393  * amdgpu_device_pci_config_reset - reset the GPU
1394  *
1395  * @adev: amdgpu_device pointer
1396  *
1397  * Resets the GPU using the pci config reset sequence.
1398  * Only applicable to asics prior to vega10.
1399  */
1400 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1401 {
1402         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1403 }
1404
1405 /**
1406  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1407  *
1408  * @adev: amdgpu_device pointer
1409  *
1410  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1411  */
1412 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1413 {
1414         return pci_reset_function(adev->pdev);
1415 }
1416
1417 /*
1418  * amdgpu_device_wb_*()
1419  * Writeback is the method by which the GPU updates special pages in memory
1420  * with the status of certain GPU events (fences, ring pointers,etc.).
1421  */
1422
1423 /**
1424  * amdgpu_device_wb_fini - Disable Writeback and free memory
1425  *
1426  * @adev: amdgpu_device pointer
1427  *
1428  * Disables Writeback and frees the Writeback memory (all asics).
1429  * Used at driver shutdown.
1430  */
1431 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1432 {
1433         if (adev->wb.wb_obj) {
1434                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1435                                       &adev->wb.gpu_addr,
1436                                       (void **)&adev->wb.wb);
1437                 adev->wb.wb_obj = NULL;
1438         }
1439 }
1440
1441 /**
1442  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1443  *
1444  * @adev: amdgpu_device pointer
1445  *
1446  * Initializes writeback and allocates writeback memory (all asics).
1447  * Used at driver startup.
1448  * Returns 0 on success or an -error on failure.
1449  */
1450 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1451 {
1452         int r;
1453
1454         if (adev->wb.wb_obj == NULL) {
1455                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1456                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1457                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1458                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1459                                             (void **)&adev->wb.wb);
1460                 if (r) {
1461                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1462                         return r;
1463                 }
1464
1465                 adev->wb.num_wb = AMDGPU_MAX_WB;
1466                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1467
1468                 /* clear wb memory */
1469                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1470         }
1471
1472         return 0;
1473 }
1474
1475 /**
1476  * amdgpu_device_wb_get - Allocate a wb entry
1477  *
1478  * @adev: amdgpu_device pointer
1479  * @wb: wb index
1480  *
1481  * Allocate a wb slot for use by the driver (all asics).
1482  * Returns 0 on success or -EINVAL on failure.
1483  */
1484 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1485 {
1486         unsigned long flags, offset;
1487
1488         spin_lock_irqsave(&adev->wb.lock, flags);
1489         offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1490         if (offset < adev->wb.num_wb) {
1491                 __set_bit(offset, adev->wb.used);
1492                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1493                 *wb = offset << 3; /* convert to dw offset */
1494                 return 0;
1495         } else {
1496                 spin_unlock_irqrestore(&adev->wb.lock, flags);
1497                 return -EINVAL;
1498         }
1499 }
1500
1501 /**
1502  * amdgpu_device_wb_free - Free a wb entry
1503  *
1504  * @adev: amdgpu_device pointer
1505  * @wb: wb index
1506  *
1507  * Free a wb slot allocated for use by the driver (all asics)
1508  */
1509 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1510 {
1511         unsigned long flags;
1512
1513         wb >>= 3;
1514         spin_lock_irqsave(&adev->wb.lock, flags);
1515         if (wb < adev->wb.num_wb)
1516                 __clear_bit(wb, adev->wb.used);
1517         spin_unlock_irqrestore(&adev->wb.lock, flags);
1518 }
1519
1520 /**
1521  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1522  *
1523  * @adev: amdgpu_device pointer
1524  *
1525  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1526  * to fail, but if any of the BARs is not accessible after the size we abort
1527  * driver loading by returning -ENODEV.
1528  */
1529 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1530 {
1531         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1532         struct pci_bus *root;
1533         struct resource *res;
1534         unsigned int i;
1535         u16 cmd;
1536         int r;
1537
1538         if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1539                 return 0;
1540
1541         /* Bypass for VF */
1542         if (amdgpu_sriov_vf(adev))
1543                 return 0;
1544
1545         /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1546         if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1547                 DRM_WARN("System can't access extended configuration space, please check!!\n");
1548
1549         /* skip if the bios has already enabled large BAR */
1550         if (adev->gmc.real_vram_size &&
1551             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1552                 return 0;
1553
1554         /* Check if the root BUS has 64bit memory resources */
1555         root = adev->pdev->bus;
1556         while (root->parent)
1557                 root = root->parent;
1558
1559         pci_bus_for_each_resource(root, res, i) {
1560                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1561                     res->start > 0x100000000ull)
1562                         break;
1563         }
1564
1565         /* Trying to resize is pointless without a root hub window above 4GB */
1566         if (!res)
1567                 return 0;
1568
1569         /* Limit the BAR size to what is available */
1570         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1571                         rbar_size);
1572
1573         /* Disable memory decoding while we change the BAR addresses and size */
1574         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1575         pci_write_config_word(adev->pdev, PCI_COMMAND,
1576                               cmd & ~PCI_COMMAND_MEMORY);
1577
1578         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1579         amdgpu_doorbell_fini(adev);
1580         if (adev->asic_type >= CHIP_BONAIRE)
1581                 pci_release_resource(adev->pdev, 2);
1582
1583         pci_release_resource(adev->pdev, 0);
1584
1585         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1586         if (r == -ENOSPC)
1587                 DRM_INFO("Not enough PCI address space for a large BAR.");
1588         else if (r && r != -ENOTSUPP)
1589                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1590
1591         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1592
1593         /* When the doorbell or fb BAR isn't available we have no chance of
1594          * using the device.
1595          */
1596         r = amdgpu_doorbell_init(adev);
1597         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1598                 return -ENODEV;
1599
1600         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1601
1602         return 0;
1603 }
1604
1605 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1606 {
1607         if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1608                 return false;
1609
1610         return true;
1611 }
1612
1613 /*
1614  * GPU helpers function.
1615  */
1616 /**
1617  * amdgpu_device_need_post - check if the hw need post or not
1618  *
1619  * @adev: amdgpu_device pointer
1620  *
1621  * Check if the asic has been initialized (all asics) at driver startup
1622  * or post is needed if  hw reset is performed.
1623  * Returns true if need or false if not.
1624  */
1625 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1626 {
1627         uint32_t reg;
1628
1629         if (amdgpu_sriov_vf(adev))
1630                 return false;
1631
1632         if (!amdgpu_device_read_bios(adev))
1633                 return false;
1634
1635         if (amdgpu_passthrough(adev)) {
1636                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1637                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1638                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1639                  * vpost executed for smc version below 22.15
1640                  */
1641                 if (adev->asic_type == CHIP_FIJI) {
1642                         int err;
1643                         uint32_t fw_ver;
1644
1645                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1646                         /* force vPost if error occured */
1647                         if (err)
1648                                 return true;
1649
1650                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1651                         release_firmware(adev->pm.fw);
1652                         if (fw_ver < 0x00160e00)
1653                                 return true;
1654                 }
1655         }
1656
1657         /* Don't post if we need to reset whole hive on init */
1658         if (adev->gmc.xgmi.pending_reset)
1659                 return false;
1660
1661         if (adev->has_hw_reset) {
1662                 adev->has_hw_reset = false;
1663                 return true;
1664         }
1665
1666         /* bios scratch used on CIK+ */
1667         if (adev->asic_type >= CHIP_BONAIRE)
1668                 return amdgpu_atombios_scratch_need_asic_init(adev);
1669
1670         /* check MEM_SIZE for older asics */
1671         reg = amdgpu_asic_get_config_memsize(adev);
1672
1673         if ((reg != 0) && (reg != 0xffffffff))
1674                 return false;
1675
1676         return true;
1677 }
1678
1679 /*
1680  * Check whether seamless boot is supported.
1681  *
1682  * So far we only support seamless boot on DCE 3.0 or later.
1683  * If users report that it works on older ASICS as well, we may
1684  * loosen this.
1685  */
1686 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1687 {
1688         switch (amdgpu_seamless) {
1689         case -1:
1690                 break;
1691         case 1:
1692                 return true;
1693         case 0:
1694                 return false;
1695         default:
1696                 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1697                           amdgpu_seamless);
1698                 return false;
1699         }
1700
1701         if (!(adev->flags & AMD_IS_APU))
1702                 return false;
1703
1704         if (adev->mman.keep_stolen_vga_memory)
1705                 return false;
1706
1707         return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1708 }
1709
1710 /*
1711  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1712  * don't support dynamic speed switching. Until we have confirmation from Intel
1713  * that a specific host supports it, it's safer that we keep it disabled for all.
1714  *
1715  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1716  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1717  */
1718 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1719 {
1720 #if IS_ENABLED(CONFIG_X86)
1721         struct cpuinfo_x86 *c = &cpu_data(0);
1722
1723         /* eGPU change speeds based on USB4 fabric conditions */
1724         if (dev_is_removable(adev->dev))
1725                 return true;
1726
1727         if (c->x86_vendor == X86_VENDOR_INTEL)
1728                 return false;
1729 #endif
1730         return true;
1731 }
1732
1733 /**
1734  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1735  *
1736  * @adev: amdgpu_device pointer
1737  *
1738  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1739  * be set for this device.
1740  *
1741  * Returns true if it should be used or false if not.
1742  */
1743 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1744 {
1745         switch (amdgpu_aspm) {
1746         case -1:
1747                 break;
1748         case 0:
1749                 return false;
1750         case 1:
1751                 return true;
1752         default:
1753                 return false;
1754         }
1755         if (adev->flags & AMD_IS_APU)
1756                 return false;
1757         if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1758                 return false;
1759         return pcie_aspm_enabled(adev->pdev);
1760 }
1761
1762 /* if we get transitioned to only one device, take VGA back */
1763 /**
1764  * amdgpu_device_vga_set_decode - enable/disable vga decode
1765  *
1766  * @pdev: PCI device pointer
1767  * @state: enable/disable vga decode
1768  *
1769  * Enable/disable vga decode (all asics).
1770  * Returns VGA resource flags.
1771  */
1772 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1773                 bool state)
1774 {
1775         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1776
1777         amdgpu_asic_set_vga_state(adev, state);
1778         if (state)
1779                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1780                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1781         else
1782                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1783 }
1784
1785 /**
1786  * amdgpu_device_check_block_size - validate the vm block size
1787  *
1788  * @adev: amdgpu_device pointer
1789  *
1790  * Validates the vm block size specified via module parameter.
1791  * The vm block size defines number of bits in page table versus page directory,
1792  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1793  * page table and the remaining bits are in the page directory.
1794  */
1795 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1796 {
1797         /* defines number of bits in page table versus page directory,
1798          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1799          * page table and the remaining bits are in the page directory
1800          */
1801         if (amdgpu_vm_block_size == -1)
1802                 return;
1803
1804         if (amdgpu_vm_block_size < 9) {
1805                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1806                          amdgpu_vm_block_size);
1807                 amdgpu_vm_block_size = -1;
1808         }
1809 }
1810
1811 /**
1812  * amdgpu_device_check_vm_size - validate the vm size
1813  *
1814  * @adev: amdgpu_device pointer
1815  *
1816  * Validates the vm size in GB specified via module parameter.
1817  * The VM size is the size of the GPU virtual memory space in GB.
1818  */
1819 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1820 {
1821         /* no need to check the default value */
1822         if (amdgpu_vm_size == -1)
1823                 return;
1824
1825         if (amdgpu_vm_size < 1) {
1826                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1827                          amdgpu_vm_size);
1828                 amdgpu_vm_size = -1;
1829         }
1830 }
1831
1832 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1833 {
1834         struct sysinfo si;
1835         bool is_os_64 = (sizeof(void *) == 8);
1836         uint64_t total_memory;
1837         uint64_t dram_size_seven_GB = 0x1B8000000;
1838         uint64_t dram_size_three_GB = 0xB8000000;
1839
1840         if (amdgpu_smu_memory_pool_size == 0)
1841                 return;
1842
1843         if (!is_os_64) {
1844                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1845                 goto def_value;
1846         }
1847         si_meminfo(&si);
1848         total_memory = (uint64_t)si.totalram * si.mem_unit;
1849
1850         if ((amdgpu_smu_memory_pool_size == 1) ||
1851                 (amdgpu_smu_memory_pool_size == 2)) {
1852                 if (total_memory < dram_size_three_GB)
1853                         goto def_value1;
1854         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1855                 (amdgpu_smu_memory_pool_size == 8)) {
1856                 if (total_memory < dram_size_seven_GB)
1857                         goto def_value1;
1858         } else {
1859                 DRM_WARN("Smu memory pool size not supported\n");
1860                 goto def_value;
1861         }
1862         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1863
1864         return;
1865
1866 def_value1:
1867         DRM_WARN("No enough system memory\n");
1868 def_value:
1869         adev->pm.smu_prv_buffer_size = 0;
1870 }
1871
1872 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1873 {
1874         if (!(adev->flags & AMD_IS_APU) ||
1875             adev->asic_type < CHIP_RAVEN)
1876                 return 0;
1877
1878         switch (adev->asic_type) {
1879         case CHIP_RAVEN:
1880                 if (adev->pdev->device == 0x15dd)
1881                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1882                 if (adev->pdev->device == 0x15d8)
1883                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1884                 break;
1885         case CHIP_RENOIR:
1886                 if ((adev->pdev->device == 0x1636) ||
1887                     (adev->pdev->device == 0x164c))
1888                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1889                 else
1890                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1891                 break;
1892         case CHIP_VANGOGH:
1893                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1894                 break;
1895         case CHIP_YELLOW_CARP:
1896                 break;
1897         case CHIP_CYAN_SKILLFISH:
1898                 if ((adev->pdev->device == 0x13FE) ||
1899                     (adev->pdev->device == 0x143F))
1900                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1901                 break;
1902         default:
1903                 break;
1904         }
1905
1906         return 0;
1907 }
1908
1909 /**
1910  * amdgpu_device_check_arguments - validate module params
1911  *
1912  * @adev: amdgpu_device pointer
1913  *
1914  * Validates certain module parameters and updates
1915  * the associated values used by the driver (all asics).
1916  */
1917 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1918 {
1919         if (amdgpu_sched_jobs < 4) {
1920                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1921                          amdgpu_sched_jobs);
1922                 amdgpu_sched_jobs = 4;
1923         } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1924                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1925                          amdgpu_sched_jobs);
1926                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1927         }
1928
1929         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1930                 /* gart size must be greater or equal to 32M */
1931                 dev_warn(adev->dev, "gart size (%d) too small\n",
1932                          amdgpu_gart_size);
1933                 amdgpu_gart_size = -1;
1934         }
1935
1936         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1937                 /* gtt size must be greater or equal to 32M */
1938                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1939                                  amdgpu_gtt_size);
1940                 amdgpu_gtt_size = -1;
1941         }
1942
1943         /* valid range is between 4 and 9 inclusive */
1944         if (amdgpu_vm_fragment_size != -1 &&
1945             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1946                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1947                 amdgpu_vm_fragment_size = -1;
1948         }
1949
1950         if (amdgpu_sched_hw_submission < 2) {
1951                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1952                          amdgpu_sched_hw_submission);
1953                 amdgpu_sched_hw_submission = 2;
1954         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1955                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1956                          amdgpu_sched_hw_submission);
1957                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1958         }
1959
1960         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1961                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1962                 amdgpu_reset_method = -1;
1963         }
1964
1965         amdgpu_device_check_smu_prv_buffer_size(adev);
1966
1967         amdgpu_device_check_vm_size(adev);
1968
1969         amdgpu_device_check_block_size(adev);
1970
1971         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1972
1973         return 0;
1974 }
1975
1976 /**
1977  * amdgpu_switcheroo_set_state - set switcheroo state
1978  *
1979  * @pdev: pci dev pointer
1980  * @state: vga_switcheroo state
1981  *
1982  * Callback for the switcheroo driver.  Suspends or resumes
1983  * the asics before or after it is powered up using ACPI methods.
1984  */
1985 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1986                                         enum vga_switcheroo_state state)
1987 {
1988         struct drm_device *dev = pci_get_drvdata(pdev);
1989         int r;
1990
1991         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1992                 return;
1993
1994         if (state == VGA_SWITCHEROO_ON) {
1995                 pr_info("switched on\n");
1996                 /* don't suspend or resume card normally */
1997                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1998
1999                 pci_set_power_state(pdev, PCI_D0);
2000                 amdgpu_device_load_pci_state(pdev);
2001                 r = pci_enable_device(pdev);
2002                 if (r)
2003                         DRM_WARN("pci_enable_device failed (%d)\n", r);
2004                 amdgpu_device_resume(dev, true);
2005
2006                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
2007         } else {
2008                 pr_info("switched off\n");
2009                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2010                 amdgpu_device_prepare(dev);
2011                 amdgpu_device_suspend(dev, true);
2012                 amdgpu_device_cache_pci_state(pdev);
2013                 /* Shut down the device */
2014                 pci_disable_device(pdev);
2015                 pci_set_power_state(pdev, PCI_D3cold);
2016                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2017         }
2018 }
2019
2020 /**
2021  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2022  *
2023  * @pdev: pci dev pointer
2024  *
2025  * Callback for the switcheroo driver.  Check of the switcheroo
2026  * state can be changed.
2027  * Returns true if the state can be changed, false if not.
2028  */
2029 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2030 {
2031         struct drm_device *dev = pci_get_drvdata(pdev);
2032
2033        /*
2034         * FIXME: open_count is protected by drm_global_mutex but that would lead to
2035         * locking inversion with the driver load path. And the access here is
2036         * completely racy anyway. So don't bother with locking for now.
2037         */
2038         return atomic_read(&dev->open_count) == 0;
2039 }
2040
2041 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2042         .set_gpu_state = amdgpu_switcheroo_set_state,
2043         .reprobe = NULL,
2044         .can_switch = amdgpu_switcheroo_can_switch,
2045 };
2046
2047 /**
2048  * amdgpu_device_ip_set_clockgating_state - set the CG state
2049  *
2050  * @dev: amdgpu_device pointer
2051  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2052  * @state: clockgating state (gate or ungate)
2053  *
2054  * Sets the requested clockgating state for all instances of
2055  * the hardware IP specified.
2056  * Returns the error code from the last instance.
2057  */
2058 int amdgpu_device_ip_set_clockgating_state(void *dev,
2059                                            enum amd_ip_block_type block_type,
2060                                            enum amd_clockgating_state state)
2061 {
2062         struct amdgpu_device *adev = dev;
2063         int i, r = 0;
2064
2065         for (i = 0; i < adev->num_ip_blocks; i++) {
2066                 if (!adev->ip_blocks[i].status.valid)
2067                         continue;
2068                 if (adev->ip_blocks[i].version->type != block_type)
2069                         continue;
2070                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2071                         continue;
2072                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2073                         (void *)adev, state);
2074                 if (r)
2075                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2076                                   adev->ip_blocks[i].version->funcs->name, r);
2077         }
2078         return r;
2079 }
2080
2081 /**
2082  * amdgpu_device_ip_set_powergating_state - set the PG state
2083  *
2084  * @dev: amdgpu_device pointer
2085  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2086  * @state: powergating state (gate or ungate)
2087  *
2088  * Sets the requested powergating state for all instances of
2089  * the hardware IP specified.
2090  * Returns the error code from the last instance.
2091  */
2092 int amdgpu_device_ip_set_powergating_state(void *dev,
2093                                            enum amd_ip_block_type block_type,
2094                                            enum amd_powergating_state state)
2095 {
2096         struct amdgpu_device *adev = dev;
2097         int i, r = 0;
2098
2099         for (i = 0; i < adev->num_ip_blocks; i++) {
2100                 if (!adev->ip_blocks[i].status.valid)
2101                         continue;
2102                 if (adev->ip_blocks[i].version->type != block_type)
2103                         continue;
2104                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2105                         continue;
2106                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2107                         (void *)adev, state);
2108                 if (r)
2109                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2110                                   adev->ip_blocks[i].version->funcs->name, r);
2111         }
2112         return r;
2113 }
2114
2115 /**
2116  * amdgpu_device_ip_get_clockgating_state - get the CG state
2117  *
2118  * @adev: amdgpu_device pointer
2119  * @flags: clockgating feature flags
2120  *
2121  * Walks the list of IPs on the device and updates the clockgating
2122  * flags for each IP.
2123  * Updates @flags with the feature flags for each hardware IP where
2124  * clockgating is enabled.
2125  */
2126 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2127                                             u64 *flags)
2128 {
2129         int i;
2130
2131         for (i = 0; i < adev->num_ip_blocks; i++) {
2132                 if (!adev->ip_blocks[i].status.valid)
2133                         continue;
2134                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2135                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2136         }
2137 }
2138
2139 /**
2140  * amdgpu_device_ip_wait_for_idle - wait for idle
2141  *
2142  * @adev: amdgpu_device pointer
2143  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2144  *
2145  * Waits for the request hardware IP to be idle.
2146  * Returns 0 for success or a negative error code on failure.
2147  */
2148 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2149                                    enum amd_ip_block_type block_type)
2150 {
2151         int i, r;
2152
2153         for (i = 0; i < adev->num_ip_blocks; i++) {
2154                 if (!adev->ip_blocks[i].status.valid)
2155                         continue;
2156                 if (adev->ip_blocks[i].version->type == block_type) {
2157                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2158                         if (r)
2159                                 return r;
2160                         break;
2161                 }
2162         }
2163         return 0;
2164
2165 }
2166
2167 /**
2168  * amdgpu_device_ip_is_idle - is the hardware IP idle
2169  *
2170  * @adev: amdgpu_device pointer
2171  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2172  *
2173  * Check if the hardware IP is idle or not.
2174  * Returns true if it the IP is idle, false if not.
2175  */
2176 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2177                               enum amd_ip_block_type block_type)
2178 {
2179         int i;
2180
2181         for (i = 0; i < adev->num_ip_blocks; i++) {
2182                 if (!adev->ip_blocks[i].status.valid)
2183                         continue;
2184                 if (adev->ip_blocks[i].version->type == block_type)
2185                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2186         }
2187         return true;
2188
2189 }
2190
2191 /**
2192  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2193  *
2194  * @adev: amdgpu_device pointer
2195  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2196  *
2197  * Returns a pointer to the hardware IP block structure
2198  * if it exists for the asic, otherwise NULL.
2199  */
2200 struct amdgpu_ip_block *
2201 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2202                               enum amd_ip_block_type type)
2203 {
2204         int i;
2205
2206         for (i = 0; i < adev->num_ip_blocks; i++)
2207                 if (adev->ip_blocks[i].version->type == type)
2208                         return &adev->ip_blocks[i];
2209
2210         return NULL;
2211 }
2212
2213 /**
2214  * amdgpu_device_ip_block_version_cmp
2215  *
2216  * @adev: amdgpu_device pointer
2217  * @type: enum amd_ip_block_type
2218  * @major: major version
2219  * @minor: minor version
2220  *
2221  * return 0 if equal or greater
2222  * return 1 if smaller or the ip_block doesn't exist
2223  */
2224 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2225                                        enum amd_ip_block_type type,
2226                                        u32 major, u32 minor)
2227 {
2228         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2229
2230         if (ip_block && ((ip_block->version->major > major) ||
2231                         ((ip_block->version->major == major) &&
2232                         (ip_block->version->minor >= minor))))
2233                 return 0;
2234
2235         return 1;
2236 }
2237
2238 /**
2239  * amdgpu_device_ip_block_add
2240  *
2241  * @adev: amdgpu_device pointer
2242  * @ip_block_version: pointer to the IP to add
2243  *
2244  * Adds the IP block driver information to the collection of IPs
2245  * on the asic.
2246  */
2247 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2248                                const struct amdgpu_ip_block_version *ip_block_version)
2249 {
2250         if (!ip_block_version)
2251                 return -EINVAL;
2252
2253         switch (ip_block_version->type) {
2254         case AMD_IP_BLOCK_TYPE_VCN:
2255                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2256                         return 0;
2257                 break;
2258         case AMD_IP_BLOCK_TYPE_JPEG:
2259                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2260                         return 0;
2261                 break;
2262         default:
2263                 break;
2264         }
2265
2266         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2267                   ip_block_version->funcs->name);
2268
2269         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2270
2271         return 0;
2272 }
2273
2274 /**
2275  * amdgpu_device_enable_virtual_display - enable virtual display feature
2276  *
2277  * @adev: amdgpu_device pointer
2278  *
2279  * Enabled the virtual display feature if the user has enabled it via
2280  * the module parameter virtual_display.  This feature provides a virtual
2281  * display hardware on headless boards or in virtualized environments.
2282  * This function parses and validates the configuration string specified by
2283  * the user and configues the virtual display configuration (number of
2284  * virtual connectors, crtcs, etc.) specified.
2285  */
2286 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2287 {
2288         adev->enable_virtual_display = false;
2289
2290         if (amdgpu_virtual_display) {
2291                 const char *pci_address_name = pci_name(adev->pdev);
2292                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2293
2294                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2295                 pciaddstr_tmp = pciaddstr;
2296                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2297                         pciaddname = strsep(&pciaddname_tmp, ",");
2298                         if (!strcmp("all", pciaddname)
2299                             || !strcmp(pci_address_name, pciaddname)) {
2300                                 long num_crtc;
2301                                 int res = -1;
2302
2303                                 adev->enable_virtual_display = true;
2304
2305                                 if (pciaddname_tmp)
2306                                         res = kstrtol(pciaddname_tmp, 10,
2307                                                       &num_crtc);
2308
2309                                 if (!res) {
2310                                         if (num_crtc < 1)
2311                                                 num_crtc = 1;
2312                                         if (num_crtc > 6)
2313                                                 num_crtc = 6;
2314                                         adev->mode_info.num_crtc = num_crtc;
2315                                 } else {
2316                                         adev->mode_info.num_crtc = 1;
2317                                 }
2318                                 break;
2319                         }
2320                 }
2321
2322                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2323                          amdgpu_virtual_display, pci_address_name,
2324                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2325
2326                 kfree(pciaddstr);
2327         }
2328 }
2329
2330 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2331 {
2332         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2333                 adev->mode_info.num_crtc = 1;
2334                 adev->enable_virtual_display = true;
2335                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2336                          adev->enable_virtual_display, adev->mode_info.num_crtc);
2337         }
2338 }
2339
2340 /**
2341  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2342  *
2343  * @adev: amdgpu_device pointer
2344  *
2345  * Parses the asic configuration parameters specified in the gpu info
2346  * firmware and makes them availale to the driver for use in configuring
2347  * the asic.
2348  * Returns 0 on success, -EINVAL on failure.
2349  */
2350 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2351 {
2352         const char *chip_name;
2353         int err;
2354         const struct gpu_info_firmware_header_v1_0 *hdr;
2355
2356         adev->firmware.gpu_info_fw = NULL;
2357
2358         if (adev->mman.discovery_bin)
2359                 return 0;
2360
2361         switch (adev->asic_type) {
2362         default:
2363                 return 0;
2364         case CHIP_VEGA10:
2365                 chip_name = "vega10";
2366                 break;
2367         case CHIP_VEGA12:
2368                 chip_name = "vega12";
2369                 break;
2370         case CHIP_RAVEN:
2371                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2372                         chip_name = "raven2";
2373                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2374                         chip_name = "picasso";
2375                 else
2376                         chip_name = "raven";
2377                 break;
2378         case CHIP_ARCTURUS:
2379                 chip_name = "arcturus";
2380                 break;
2381         case CHIP_NAVI12:
2382                 chip_name = "navi12";
2383                 break;
2384         }
2385
2386         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2387                                    "amdgpu/%s_gpu_info.bin", chip_name);
2388         if (err) {
2389                 dev_err(adev->dev,
2390                         "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2391                         chip_name);
2392                 goto out;
2393         }
2394
2395         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2396         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2397
2398         switch (hdr->version_major) {
2399         case 1:
2400         {
2401                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2402                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2403                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2404
2405                 /*
2406                  * Should be droped when DAL no longer needs it.
2407                  */
2408                 if (adev->asic_type == CHIP_NAVI12)
2409                         goto parse_soc_bounding_box;
2410
2411                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2412                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2413                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2414                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2415                 adev->gfx.config.max_texture_channel_caches =
2416                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2417                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2418                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2419                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2420                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2421                 adev->gfx.config.double_offchip_lds_buf =
2422                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2423                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2424                 adev->gfx.cu_info.max_waves_per_simd =
2425                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2426                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2427                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2428                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2429                 if (hdr->version_minor >= 1) {
2430                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2431                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2432                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2433                         adev->gfx.config.num_sc_per_sh =
2434                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2435                         adev->gfx.config.num_packer_per_sc =
2436                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2437                 }
2438
2439 parse_soc_bounding_box:
2440                 /*
2441                  * soc bounding box info is not integrated in disocovery table,
2442                  * we always need to parse it from gpu info firmware if needed.
2443                  */
2444                 if (hdr->version_minor == 2) {
2445                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2446                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2447                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2448                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2449                 }
2450                 break;
2451         }
2452         default:
2453                 dev_err(adev->dev,
2454                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2455                 err = -EINVAL;
2456                 goto out;
2457         }
2458 out:
2459         return err;
2460 }
2461
2462 /**
2463  * amdgpu_device_ip_early_init - run early init for hardware IPs
2464  *
2465  * @adev: amdgpu_device pointer
2466  *
2467  * Early initialization pass for hardware IPs.  The hardware IPs that make
2468  * up each asic are discovered each IP's early_init callback is run.  This
2469  * is the first stage in initializing the asic.
2470  * Returns 0 on success, negative error code on failure.
2471  */
2472 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2473 {
2474         struct pci_dev *parent;
2475         int i, r;
2476         bool total;
2477
2478         amdgpu_device_enable_virtual_display(adev);
2479
2480         if (amdgpu_sriov_vf(adev)) {
2481                 r = amdgpu_virt_request_full_gpu(adev, true);
2482                 if (r)
2483                         return r;
2484         }
2485
2486         switch (adev->asic_type) {
2487 #ifdef CONFIG_DRM_AMDGPU_SI
2488         case CHIP_VERDE:
2489         case CHIP_TAHITI:
2490         case CHIP_PITCAIRN:
2491         case CHIP_OLAND:
2492         case CHIP_HAINAN:
2493                 adev->family = AMDGPU_FAMILY_SI;
2494                 r = si_set_ip_blocks(adev);
2495                 if (r)
2496                         return r;
2497                 break;
2498 #endif
2499 #ifdef CONFIG_DRM_AMDGPU_CIK
2500         case CHIP_BONAIRE:
2501         case CHIP_HAWAII:
2502         case CHIP_KAVERI:
2503         case CHIP_KABINI:
2504         case CHIP_MULLINS:
2505                 if (adev->flags & AMD_IS_APU)
2506                         adev->family = AMDGPU_FAMILY_KV;
2507                 else
2508                         adev->family = AMDGPU_FAMILY_CI;
2509
2510                 r = cik_set_ip_blocks(adev);
2511                 if (r)
2512                         return r;
2513                 break;
2514 #endif
2515         case CHIP_TOPAZ:
2516         case CHIP_TONGA:
2517         case CHIP_FIJI:
2518         case CHIP_POLARIS10:
2519         case CHIP_POLARIS11:
2520         case CHIP_POLARIS12:
2521         case CHIP_VEGAM:
2522         case CHIP_CARRIZO:
2523         case CHIP_STONEY:
2524                 if (adev->flags & AMD_IS_APU)
2525                         adev->family = AMDGPU_FAMILY_CZ;
2526                 else
2527                         adev->family = AMDGPU_FAMILY_VI;
2528
2529                 r = vi_set_ip_blocks(adev);
2530                 if (r)
2531                         return r;
2532                 break;
2533         default:
2534                 r = amdgpu_discovery_set_ip_blocks(adev);
2535                 if (r)
2536                         return r;
2537                 break;
2538         }
2539
2540         if (amdgpu_has_atpx() &&
2541             (amdgpu_is_atpx_hybrid() ||
2542              amdgpu_has_atpx_dgpu_power_cntl()) &&
2543             ((adev->flags & AMD_IS_APU) == 0) &&
2544             !dev_is_removable(&adev->pdev->dev))
2545                 adev->flags |= AMD_IS_PX;
2546
2547         if (!(adev->flags & AMD_IS_APU)) {
2548                 parent = pcie_find_root_port(adev->pdev);
2549                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2550         }
2551
2552
2553         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2554         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2555                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2556         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2557                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2558         if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2559                 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2560
2561         total = true;
2562         for (i = 0; i < adev->num_ip_blocks; i++) {
2563                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2564                         DRM_WARN("disabled ip block: %d <%s>\n",
2565                                   i, adev->ip_blocks[i].version->funcs->name);
2566                         adev->ip_blocks[i].status.valid = false;
2567                 } else {
2568                         if (adev->ip_blocks[i].version->funcs->early_init) {
2569                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2570                                 if (r == -ENOENT) {
2571                                         adev->ip_blocks[i].status.valid = false;
2572                                 } else if (r) {
2573                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2574                                                   adev->ip_blocks[i].version->funcs->name, r);
2575                                         total = false;
2576                                 } else {
2577                                         adev->ip_blocks[i].status.valid = true;
2578                                 }
2579                         } else {
2580                                 adev->ip_blocks[i].status.valid = true;
2581                         }
2582                 }
2583                 /* get the vbios after the asic_funcs are set up */
2584                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2585                         r = amdgpu_device_parse_gpu_info_fw(adev);
2586                         if (r)
2587                                 return r;
2588
2589                         /* Read BIOS */
2590                         if (amdgpu_device_read_bios(adev)) {
2591                                 if (!amdgpu_get_bios(adev))
2592                                         return -EINVAL;
2593
2594                                 r = amdgpu_atombios_init(adev);
2595                                 if (r) {
2596                                         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2597                                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2598                                         return r;
2599                                 }
2600                         }
2601
2602                         /*get pf2vf msg info at it's earliest time*/
2603                         if (amdgpu_sriov_vf(adev))
2604                                 amdgpu_virt_init_data_exchange(adev);
2605
2606                 }
2607         }
2608         if (!total)
2609                 return -ENODEV;
2610
2611         amdgpu_amdkfd_device_probe(adev);
2612         adev->cg_flags &= amdgpu_cg_mask;
2613         adev->pg_flags &= amdgpu_pg_mask;
2614
2615         return 0;
2616 }
2617
2618 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2619 {
2620         int i, r;
2621
2622         for (i = 0; i < adev->num_ip_blocks; i++) {
2623                 if (!adev->ip_blocks[i].status.sw)
2624                         continue;
2625                 if (adev->ip_blocks[i].status.hw)
2626                         continue;
2627                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2628                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2629                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2630                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2631                         if (r) {
2632                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2633                                           adev->ip_blocks[i].version->funcs->name, r);
2634                                 return r;
2635                         }
2636                         adev->ip_blocks[i].status.hw = true;
2637                 }
2638         }
2639
2640         return 0;
2641 }
2642
2643 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2644 {
2645         int i, r;
2646
2647         for (i = 0; i < adev->num_ip_blocks; i++) {
2648                 if (!adev->ip_blocks[i].status.sw)
2649                         continue;
2650                 if (adev->ip_blocks[i].status.hw)
2651                         continue;
2652                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2653                 if (r) {
2654                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2655                                   adev->ip_blocks[i].version->funcs->name, r);
2656                         return r;
2657                 }
2658                 adev->ip_blocks[i].status.hw = true;
2659         }
2660
2661         return 0;
2662 }
2663
2664 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2665 {
2666         int r = 0;
2667         int i;
2668         uint32_t smu_version;
2669
2670         if (adev->asic_type >= CHIP_VEGA10) {
2671                 for (i = 0; i < adev->num_ip_blocks; i++) {
2672                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2673                                 continue;
2674
2675                         if (!adev->ip_blocks[i].status.sw)
2676                                 continue;
2677
2678                         /* no need to do the fw loading again if already done*/
2679                         if (adev->ip_blocks[i].status.hw == true)
2680                                 break;
2681
2682                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2683                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2684                                 if (r) {
2685                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2686                                                           adev->ip_blocks[i].version->funcs->name, r);
2687                                         return r;
2688                                 }
2689                         } else {
2690                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2691                                 if (r) {
2692                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2693                                                           adev->ip_blocks[i].version->funcs->name, r);
2694                                         return r;
2695                                 }
2696                         }
2697
2698                         adev->ip_blocks[i].status.hw = true;
2699                         break;
2700                 }
2701         }
2702
2703         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2704                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2705
2706         return r;
2707 }
2708
2709 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2710 {
2711         long timeout;
2712         int r, i;
2713
2714         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2715                 struct amdgpu_ring *ring = adev->rings[i];
2716
2717                 /* No need to setup the GPU scheduler for rings that don't need it */
2718                 if (!ring || ring->no_scheduler)
2719                         continue;
2720
2721                 switch (ring->funcs->type) {
2722                 case AMDGPU_RING_TYPE_GFX:
2723                         timeout = adev->gfx_timeout;
2724                         break;
2725                 case AMDGPU_RING_TYPE_COMPUTE:
2726                         timeout = adev->compute_timeout;
2727                         break;
2728                 case AMDGPU_RING_TYPE_SDMA:
2729                         timeout = adev->sdma_timeout;
2730                         break;
2731                 default:
2732                         timeout = adev->video_timeout;
2733                         break;
2734                 }
2735
2736                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2737                                    DRM_SCHED_PRIORITY_COUNT,
2738                                    ring->num_hw_submission, 0,
2739                                    timeout, adev->reset_domain->wq,
2740                                    ring->sched_score, ring->name,
2741                                    adev->dev);
2742                 if (r) {
2743                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2744                                   ring->name);
2745                         return r;
2746                 }
2747                 r = amdgpu_uvd_entity_init(adev, ring);
2748                 if (r) {
2749                         DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2750                                   ring->name);
2751                         return r;
2752                 }
2753                 r = amdgpu_vce_entity_init(adev, ring);
2754                 if (r) {
2755                         DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2756                                   ring->name);
2757                         return r;
2758                 }
2759         }
2760
2761         amdgpu_xcp_update_partition_sched_list(adev);
2762
2763         return 0;
2764 }
2765
2766
2767 /**
2768  * amdgpu_device_ip_init - run init for hardware IPs
2769  *
2770  * @adev: amdgpu_device pointer
2771  *
2772  * Main initialization pass for hardware IPs.  The list of all the hardware
2773  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2774  * are run.  sw_init initializes the software state associated with each IP
2775  * and hw_init initializes the hardware associated with each IP.
2776  * Returns 0 on success, negative error code on failure.
2777  */
2778 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2779 {
2780         int i, r;
2781
2782         r = amdgpu_ras_init(adev);
2783         if (r)
2784                 return r;
2785
2786         for (i = 0; i < adev->num_ip_blocks; i++) {
2787                 if (!adev->ip_blocks[i].status.valid)
2788                         continue;
2789                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2790                 if (r) {
2791                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2792                                   adev->ip_blocks[i].version->funcs->name, r);
2793                         goto init_failed;
2794                 }
2795                 adev->ip_blocks[i].status.sw = true;
2796
2797                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2798                         /* need to do common hw init early so everything is set up for gmc */
2799                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2800                         if (r) {
2801                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2802                                 goto init_failed;
2803                         }
2804                         adev->ip_blocks[i].status.hw = true;
2805                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2806                         /* need to do gmc hw init early so we can allocate gpu mem */
2807                         /* Try to reserve bad pages early */
2808                         if (amdgpu_sriov_vf(adev))
2809                                 amdgpu_virt_exchange_data(adev);
2810
2811                         r = amdgpu_device_mem_scratch_init(adev);
2812                         if (r) {
2813                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2814                                 goto init_failed;
2815                         }
2816                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2817                         if (r) {
2818                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2819                                 goto init_failed;
2820                         }
2821                         r = amdgpu_device_wb_init(adev);
2822                         if (r) {
2823                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2824                                 goto init_failed;
2825                         }
2826                         adev->ip_blocks[i].status.hw = true;
2827
2828                         /* right after GMC hw init, we create CSA */
2829                         if (adev->gfx.mcbp) {
2830                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2831                                                                AMDGPU_GEM_DOMAIN_VRAM |
2832                                                                AMDGPU_GEM_DOMAIN_GTT,
2833                                                                AMDGPU_CSA_SIZE);
2834                                 if (r) {
2835                                         DRM_ERROR("allocate CSA failed %d\n", r);
2836                                         goto init_failed;
2837                                 }
2838                         }
2839
2840                         r = amdgpu_seq64_init(adev);
2841                         if (r) {
2842                                 DRM_ERROR("allocate seq64 failed %d\n", r);
2843                                 goto init_failed;
2844                         }
2845                 }
2846         }
2847
2848         if (amdgpu_sriov_vf(adev))
2849                 amdgpu_virt_init_data_exchange(adev);
2850
2851         r = amdgpu_ib_pool_init(adev);
2852         if (r) {
2853                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2854                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2855                 goto init_failed;
2856         }
2857
2858         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2859         if (r)
2860                 goto init_failed;
2861
2862         r = amdgpu_device_ip_hw_init_phase1(adev);
2863         if (r)
2864                 goto init_failed;
2865
2866         r = amdgpu_device_fw_loading(adev);
2867         if (r)
2868                 goto init_failed;
2869
2870         r = amdgpu_device_ip_hw_init_phase2(adev);
2871         if (r)
2872                 goto init_failed;
2873
2874         /*
2875          * retired pages will be loaded from eeprom and reserved here,
2876          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2877          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2878          * for I2C communication which only true at this point.
2879          *
2880          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2881          * failure from bad gpu situation and stop amdgpu init process
2882          * accordingly. For other failed cases, it will still release all
2883          * the resource and print error message, rather than returning one
2884          * negative value to upper level.
2885          *
2886          * Note: theoretically, this should be called before all vram allocations
2887          * to protect retired page from abusing
2888          */
2889         r = amdgpu_ras_recovery_init(adev);
2890         if (r)
2891                 goto init_failed;
2892
2893         /**
2894          * In case of XGMI grab extra reference for reset domain for this device
2895          */
2896         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2897                 if (amdgpu_xgmi_add_device(adev) == 0) {
2898                         if (!amdgpu_sriov_vf(adev)) {
2899                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2900
2901                                 if (WARN_ON(!hive)) {
2902                                         r = -ENOENT;
2903                                         goto init_failed;
2904                                 }
2905
2906                                 if (!hive->reset_domain ||
2907                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2908                                         r = -ENOENT;
2909                                         amdgpu_put_xgmi_hive(hive);
2910                                         goto init_failed;
2911                                 }
2912
2913                                 /* Drop the early temporary reset domain we created for device */
2914                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2915                                 adev->reset_domain = hive->reset_domain;
2916                                 amdgpu_put_xgmi_hive(hive);
2917                         }
2918                 }
2919         }
2920
2921         r = amdgpu_device_init_schedulers(adev);
2922         if (r)
2923                 goto init_failed;
2924
2925         if (adev->mman.buffer_funcs_ring->sched.ready)
2926                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2927
2928         /* Don't init kfd if whole hive need to be reset during init */
2929         if (!adev->gmc.xgmi.pending_reset) {
2930                 kgd2kfd_init_zone_device(adev);
2931                 amdgpu_amdkfd_device_init(adev);
2932         }
2933
2934         amdgpu_fru_get_product_info(adev);
2935
2936 init_failed:
2937
2938         return r;
2939 }
2940
2941 /**
2942  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2943  *
2944  * @adev: amdgpu_device pointer
2945  *
2946  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2947  * this function before a GPU reset.  If the value is retained after a
2948  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2949  */
2950 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2951 {
2952         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2953 }
2954
2955 /**
2956  * amdgpu_device_check_vram_lost - check if vram is valid
2957  *
2958  * @adev: amdgpu_device pointer
2959  *
2960  * Checks the reset magic value written to the gart pointer in VRAM.
2961  * The driver calls this after a GPU reset to see if the contents of
2962  * VRAM is lost or now.
2963  * returns true if vram is lost, false if not.
2964  */
2965 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2966 {
2967         if (memcmp(adev->gart.ptr, adev->reset_magic,
2968                         AMDGPU_RESET_MAGIC_NUM))
2969                 return true;
2970
2971         if (!amdgpu_in_reset(adev))
2972                 return false;
2973
2974         /*
2975          * For all ASICs with baco/mode1 reset, the VRAM is
2976          * always assumed to be lost.
2977          */
2978         switch (amdgpu_asic_reset_method(adev)) {
2979         case AMD_RESET_METHOD_BACO:
2980         case AMD_RESET_METHOD_MODE1:
2981                 return true;
2982         default:
2983                 return false;
2984         }
2985 }
2986
2987 /**
2988  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2989  *
2990  * @adev: amdgpu_device pointer
2991  * @state: clockgating state (gate or ungate)
2992  *
2993  * The list of all the hardware IPs that make up the asic is walked and the
2994  * set_clockgating_state callbacks are run.
2995  * Late initialization pass enabling clockgating for hardware IPs.
2996  * Fini or suspend, pass disabling clockgating for hardware IPs.
2997  * Returns 0 on success, negative error code on failure.
2998  */
2999
3000 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3001                                enum amd_clockgating_state state)
3002 {
3003         int i, j, r;
3004
3005         if (amdgpu_emu_mode == 1)
3006                 return 0;
3007
3008         for (j = 0; j < adev->num_ip_blocks; j++) {
3009                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3010                 if (!adev->ip_blocks[i].status.late_initialized)
3011                         continue;
3012                 /* skip CG for GFX, SDMA on S0ix */
3013                 if (adev->in_s0ix &&
3014                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3015                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3016                         continue;
3017                 /* skip CG for VCE/UVD, it's handled specially */
3018                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3019                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3020                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3021                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3022                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3023                         /* enable clockgating to save power */
3024                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3025                                                                                      state);
3026                         if (r) {
3027                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3028                                           adev->ip_blocks[i].version->funcs->name, r);
3029                                 return r;
3030                         }
3031                 }
3032         }
3033
3034         return 0;
3035 }
3036
3037 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3038                                enum amd_powergating_state state)
3039 {
3040         int i, j, r;
3041
3042         if (amdgpu_emu_mode == 1)
3043                 return 0;
3044
3045         for (j = 0; j < adev->num_ip_blocks; j++) {
3046                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3047                 if (!adev->ip_blocks[i].status.late_initialized)
3048                         continue;
3049                 /* skip PG for GFX, SDMA on S0ix */
3050                 if (adev->in_s0ix &&
3051                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3052                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3053                         continue;
3054                 /* skip CG for VCE/UVD, it's handled specially */
3055                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3056                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3057                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3058                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3059                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
3060                         /* enable powergating to save power */
3061                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3062                                                                                         state);
3063                         if (r) {
3064                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3065                                           adev->ip_blocks[i].version->funcs->name, r);
3066                                 return r;
3067                         }
3068                 }
3069         }
3070         return 0;
3071 }
3072
3073 static int amdgpu_device_enable_mgpu_fan_boost(void)
3074 {
3075         struct amdgpu_gpu_instance *gpu_ins;
3076         struct amdgpu_device *adev;
3077         int i, ret = 0;
3078
3079         mutex_lock(&mgpu_info.mutex);
3080
3081         /*
3082          * MGPU fan boost feature should be enabled
3083          * only when there are two or more dGPUs in
3084          * the system
3085          */
3086         if (mgpu_info.num_dgpu < 2)
3087                 goto out;
3088
3089         for (i = 0; i < mgpu_info.num_dgpu; i++) {
3090                 gpu_ins = &(mgpu_info.gpu_ins[i]);
3091                 adev = gpu_ins->adev;
3092                 if (!(adev->flags & AMD_IS_APU) &&
3093                     !gpu_ins->mgpu_fan_enabled) {
3094                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3095                         if (ret)
3096                                 break;
3097
3098                         gpu_ins->mgpu_fan_enabled = 1;
3099                 }
3100         }
3101
3102 out:
3103         mutex_unlock(&mgpu_info.mutex);
3104
3105         return ret;
3106 }
3107
3108 /**
3109  * amdgpu_device_ip_late_init - run late init for hardware IPs
3110  *
3111  * @adev: amdgpu_device pointer
3112  *
3113  * Late initialization pass for hardware IPs.  The list of all the hardware
3114  * IPs that make up the asic is walked and the late_init callbacks are run.
3115  * late_init covers any special initialization that an IP requires
3116  * after all of the have been initialized or something that needs to happen
3117  * late in the init process.
3118  * Returns 0 on success, negative error code on failure.
3119  */
3120 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3121 {
3122         struct amdgpu_gpu_instance *gpu_instance;
3123         int i = 0, r;
3124
3125         for (i = 0; i < adev->num_ip_blocks; i++) {
3126                 if (!adev->ip_blocks[i].status.hw)
3127                         continue;
3128                 if (adev->ip_blocks[i].version->funcs->late_init) {
3129                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3130                         if (r) {
3131                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3132                                           adev->ip_blocks[i].version->funcs->name, r);
3133                                 return r;
3134                         }
3135                 }
3136                 adev->ip_blocks[i].status.late_initialized = true;
3137         }
3138
3139         r = amdgpu_ras_late_init(adev);
3140         if (r) {
3141                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3142                 return r;
3143         }
3144
3145         if (!amdgpu_in_reset(adev))
3146                 amdgpu_ras_set_error_query_ready(adev, true);
3147
3148         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3149         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3150
3151         amdgpu_device_fill_reset_magic(adev);
3152
3153         r = amdgpu_device_enable_mgpu_fan_boost();
3154         if (r)
3155                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3156
3157         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3158         if (amdgpu_passthrough(adev) &&
3159             ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3160              adev->asic_type == CHIP_ALDEBARAN))
3161                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
3162
3163         if (adev->gmc.xgmi.num_physical_nodes > 1) {
3164                 mutex_lock(&mgpu_info.mutex);
3165
3166                 /*
3167                  * Reset device p-state to low as this was booted with high.
3168                  *
3169                  * This should be performed only after all devices from the same
3170                  * hive get initialized.
3171                  *
3172                  * However, it's unknown how many device in the hive in advance.
3173                  * As this is counted one by one during devices initializations.
3174                  *
3175                  * So, we wait for all XGMI interlinked devices initialized.
3176                  * This may bring some delays as those devices may come from
3177                  * different hives. But that should be OK.
3178                  */
3179                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3180                         for (i = 0; i < mgpu_info.num_gpu; i++) {
3181                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
3182                                 if (gpu_instance->adev->flags & AMD_IS_APU)
3183                                         continue;
3184
3185                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3186                                                 AMDGPU_XGMI_PSTATE_MIN);
3187                                 if (r) {
3188                                         DRM_ERROR("pstate setting failed (%d).\n", r);
3189                                         break;
3190                                 }
3191                         }
3192                 }
3193
3194                 mutex_unlock(&mgpu_info.mutex);
3195         }
3196
3197         return 0;
3198 }
3199
3200 /**
3201  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3202  *
3203  * @adev: amdgpu_device pointer
3204  *
3205  * For ASICs need to disable SMC first
3206  */
3207 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3208 {
3209         int i, r;
3210
3211         if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3212                 return;
3213
3214         for (i = 0; i < adev->num_ip_blocks; i++) {
3215                 if (!adev->ip_blocks[i].status.hw)
3216                         continue;
3217                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3218                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3219                         /* XXX handle errors */
3220                         if (r) {
3221                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3222                                           adev->ip_blocks[i].version->funcs->name, r);
3223                         }
3224                         adev->ip_blocks[i].status.hw = false;
3225                         break;
3226                 }
3227         }
3228 }
3229
3230 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3231 {
3232         int i, r;
3233
3234         for (i = 0; i < adev->num_ip_blocks; i++) {
3235                 if (!adev->ip_blocks[i].version->funcs->early_fini)
3236                         continue;
3237
3238                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3239                 if (r) {
3240                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3241                                   adev->ip_blocks[i].version->funcs->name, r);
3242                 }
3243         }
3244
3245         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3246         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3247
3248         amdgpu_amdkfd_suspend(adev, false);
3249
3250         /* Workaroud for ASICs need to disable SMC first */
3251         amdgpu_device_smu_fini_early(adev);
3252
3253         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3254                 if (!adev->ip_blocks[i].status.hw)
3255                         continue;
3256
3257                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3258                 /* XXX handle errors */
3259                 if (r) {
3260                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3261                                   adev->ip_blocks[i].version->funcs->name, r);
3262                 }
3263
3264                 adev->ip_blocks[i].status.hw = false;
3265         }
3266
3267         if (amdgpu_sriov_vf(adev)) {
3268                 if (amdgpu_virt_release_full_gpu(adev, false))
3269                         DRM_ERROR("failed to release exclusive mode on fini\n");
3270         }
3271
3272         return 0;
3273 }
3274
3275 /**
3276  * amdgpu_device_ip_fini - run fini for hardware IPs
3277  *
3278  * @adev: amdgpu_device pointer
3279  *
3280  * Main teardown pass for hardware IPs.  The list of all the hardware
3281  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3282  * are run.  hw_fini tears down the hardware associated with each IP
3283  * and sw_fini tears down any software state associated with each IP.
3284  * Returns 0 on success, negative error code on failure.
3285  */
3286 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3287 {
3288         int i, r;
3289
3290         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3291                 amdgpu_virt_release_ras_err_handler_data(adev);
3292
3293         if (adev->gmc.xgmi.num_physical_nodes > 1)
3294                 amdgpu_xgmi_remove_device(adev);
3295
3296         amdgpu_amdkfd_device_fini_sw(adev);
3297
3298         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3299                 if (!adev->ip_blocks[i].status.sw)
3300                         continue;
3301
3302                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3303                         amdgpu_ucode_free_bo(adev);
3304                         amdgpu_free_static_csa(&adev->virt.csa_obj);
3305                         amdgpu_device_wb_fini(adev);
3306                         amdgpu_device_mem_scratch_fini(adev);
3307                         amdgpu_ib_pool_fini(adev);
3308                         amdgpu_seq64_fini(adev);
3309                 }
3310
3311                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3312                 /* XXX handle errors */
3313                 if (r) {
3314                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3315                                   adev->ip_blocks[i].version->funcs->name, r);
3316                 }
3317                 adev->ip_blocks[i].status.sw = false;
3318                 adev->ip_blocks[i].status.valid = false;
3319         }
3320
3321         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3322                 if (!adev->ip_blocks[i].status.late_initialized)
3323                         continue;
3324                 if (adev->ip_blocks[i].version->funcs->late_fini)
3325                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3326                 adev->ip_blocks[i].status.late_initialized = false;
3327         }
3328
3329         amdgpu_ras_fini(adev);
3330
3331         return 0;
3332 }
3333
3334 /**
3335  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3336  *
3337  * @work: work_struct.
3338  */
3339 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3340 {
3341         struct amdgpu_device *adev =
3342                 container_of(work, struct amdgpu_device, delayed_init_work.work);
3343         int r;
3344
3345         r = amdgpu_ib_ring_tests(adev);
3346         if (r)
3347                 DRM_ERROR("ib ring test failed (%d).\n", r);
3348 }
3349
3350 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3351 {
3352         struct amdgpu_device *adev =
3353                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3354
3355         WARN_ON_ONCE(adev->gfx.gfx_off_state);
3356         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3357
3358         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3359                 adev->gfx.gfx_off_state = true;
3360 }
3361
3362 /**
3363  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3364  *
3365  * @adev: amdgpu_device pointer
3366  *
3367  * Main suspend function for hardware IPs.  The list of all the hardware
3368  * IPs that make up the asic is walked, clockgating is disabled and the
3369  * suspend callbacks are run.  suspend puts the hardware and software state
3370  * in each IP into a state suitable for suspend.
3371  * Returns 0 on success, negative error code on failure.
3372  */
3373 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3374 {
3375         int i, r;
3376
3377         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3378         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3379
3380         /*
3381          * Per PMFW team's suggestion, driver needs to handle gfxoff
3382          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3383          * scenario. Add the missing df cstate disablement here.
3384          */
3385         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3386                 dev_warn(adev->dev, "Failed to disallow df cstate");
3387
3388         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3389                 if (!adev->ip_blocks[i].status.valid)
3390                         continue;
3391
3392                 /* displays are handled separately */
3393                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3394                         continue;
3395
3396                 /* XXX handle errors */
3397                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3398                 /* XXX handle errors */
3399                 if (r) {
3400                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3401                                   adev->ip_blocks[i].version->funcs->name, r);
3402                         return r;
3403                 }
3404
3405                 adev->ip_blocks[i].status.hw = false;
3406         }
3407
3408         return 0;
3409 }
3410
3411 /**
3412  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3413  *
3414  * @adev: amdgpu_device pointer
3415  *
3416  * Main suspend function for hardware IPs.  The list of all the hardware
3417  * IPs that make up the asic is walked, clockgating is disabled and the
3418  * suspend callbacks are run.  suspend puts the hardware and software state
3419  * in each IP into a state suitable for suspend.
3420  * Returns 0 on success, negative error code on failure.
3421  */
3422 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3423 {
3424         int i, r;
3425
3426         if (adev->in_s0ix)
3427                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3428
3429         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3430                 if (!adev->ip_blocks[i].status.valid)
3431                         continue;
3432                 /* displays are handled in phase1 */
3433                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3434                         continue;
3435                 /* PSP lost connection when err_event_athub occurs */
3436                 if (amdgpu_ras_intr_triggered() &&
3437                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3438                         adev->ip_blocks[i].status.hw = false;
3439                         continue;
3440                 }
3441
3442                 /* skip unnecessary suspend if we do not initialize them yet */
3443                 if (adev->gmc.xgmi.pending_reset &&
3444                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3445                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3446                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3447                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3448                         adev->ip_blocks[i].status.hw = false;
3449                         continue;
3450                 }
3451
3452                 /* skip suspend of gfx/mes and psp for S0ix
3453                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3454                  * like at runtime. PSP is also part of the always on hardware
3455                  * so no need to suspend it.
3456                  */
3457                 if (adev->in_s0ix &&
3458                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3459                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3460                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3461                         continue;
3462
3463                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3464                 if (adev->in_s0ix &&
3465                     (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3466                      IP_VERSION(5, 0, 0)) &&
3467                     (adev->ip_blocks[i].version->type ==
3468                      AMD_IP_BLOCK_TYPE_SDMA))
3469                         continue;
3470
3471                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3472                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3473                  * from this location and RLC Autoload automatically also gets loaded
3474                  * from here based on PMFW -> PSP message during re-init sequence.
3475                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3476                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3477                  */
3478                 if (amdgpu_in_reset(adev) &&
3479                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3480                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3481                         continue;
3482
3483                 /* XXX handle errors */
3484                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3485                 /* XXX handle errors */
3486                 if (r) {
3487                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3488                                   adev->ip_blocks[i].version->funcs->name, r);
3489                 }
3490                 adev->ip_blocks[i].status.hw = false;
3491                 /* handle putting the SMC in the appropriate state */
3492                 if (!amdgpu_sriov_vf(adev)) {
3493                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3494                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3495                                 if (r) {
3496                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3497                                                         adev->mp1_state, r);
3498                                         return r;
3499                                 }
3500                         }
3501                 }
3502         }
3503
3504         return 0;
3505 }
3506
3507 /**
3508  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3509  *
3510  * @adev: amdgpu_device pointer
3511  *
3512  * Main suspend function for hardware IPs.  The list of all the hardware
3513  * IPs that make up the asic is walked, clockgating is disabled and the
3514  * suspend callbacks are run.  suspend puts the hardware and software state
3515  * in each IP into a state suitable for suspend.
3516  * Returns 0 on success, negative error code on failure.
3517  */
3518 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3519 {
3520         int r;
3521
3522         if (amdgpu_sriov_vf(adev)) {
3523                 amdgpu_virt_fini_data_exchange(adev);
3524                 amdgpu_virt_request_full_gpu(adev, false);
3525         }
3526
3527         amdgpu_ttm_set_buffer_funcs_status(adev, false);
3528
3529         r = amdgpu_device_ip_suspend_phase1(adev);
3530         if (r)
3531                 return r;
3532         r = amdgpu_device_ip_suspend_phase2(adev);
3533
3534         if (amdgpu_sriov_vf(adev))
3535                 amdgpu_virt_release_full_gpu(adev, false);
3536
3537         return r;
3538 }
3539
3540 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3541 {
3542         int i, r;
3543
3544         static enum amd_ip_block_type ip_order[] = {
3545                 AMD_IP_BLOCK_TYPE_COMMON,
3546                 AMD_IP_BLOCK_TYPE_GMC,
3547                 AMD_IP_BLOCK_TYPE_PSP,
3548                 AMD_IP_BLOCK_TYPE_IH,
3549         };
3550
3551         for (i = 0; i < adev->num_ip_blocks; i++) {
3552                 int j;
3553                 struct amdgpu_ip_block *block;
3554
3555                 block = &adev->ip_blocks[i];
3556                 block->status.hw = false;
3557
3558                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3559
3560                         if (block->version->type != ip_order[j] ||
3561                                 !block->status.valid)
3562                                 continue;
3563
3564                         r = block->version->funcs->hw_init(adev);
3565                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3566                         if (r)
3567                                 return r;
3568                         block->status.hw = true;
3569                 }
3570         }
3571
3572         return 0;
3573 }
3574
3575 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3576 {
3577         int i, r;
3578
3579         static enum amd_ip_block_type ip_order[] = {
3580                 AMD_IP_BLOCK_TYPE_SMC,
3581                 AMD_IP_BLOCK_TYPE_DCE,
3582                 AMD_IP_BLOCK_TYPE_GFX,
3583                 AMD_IP_BLOCK_TYPE_SDMA,
3584                 AMD_IP_BLOCK_TYPE_MES,
3585                 AMD_IP_BLOCK_TYPE_UVD,
3586                 AMD_IP_BLOCK_TYPE_VCE,
3587                 AMD_IP_BLOCK_TYPE_VCN,
3588                 AMD_IP_BLOCK_TYPE_JPEG
3589         };
3590
3591         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3592                 int j;
3593                 struct amdgpu_ip_block *block;
3594
3595                 for (j = 0; j < adev->num_ip_blocks; j++) {
3596                         block = &adev->ip_blocks[j];
3597
3598                         if (block->version->type != ip_order[i] ||
3599                                 !block->status.valid ||
3600                                 block->status.hw)
3601                                 continue;
3602
3603                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3604                                 r = block->version->funcs->resume(adev);
3605                         else
3606                                 r = block->version->funcs->hw_init(adev);
3607
3608                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3609                         if (r)
3610                                 return r;
3611                         block->status.hw = true;
3612                 }
3613         }
3614
3615         return 0;
3616 }
3617
3618 /**
3619  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3620  *
3621  * @adev: amdgpu_device pointer
3622  *
3623  * First resume function for hardware IPs.  The list of all the hardware
3624  * IPs that make up the asic is walked and the resume callbacks are run for
3625  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3626  * after a suspend and updates the software state as necessary.  This
3627  * function is also used for restoring the GPU after a GPU reset.
3628  * Returns 0 on success, negative error code on failure.
3629  */
3630 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3631 {
3632         int i, r;
3633
3634         for (i = 0; i < adev->num_ip_blocks; i++) {
3635                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3636                         continue;
3637                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3638                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3639                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3640                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3641
3642                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3643                         if (r) {
3644                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3645                                           adev->ip_blocks[i].version->funcs->name, r);
3646                                 return r;
3647                         }
3648                         adev->ip_blocks[i].status.hw = true;
3649                 }
3650         }
3651
3652         return 0;
3653 }
3654
3655 /**
3656  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3657  *
3658  * @adev: amdgpu_device pointer
3659  *
3660  * First resume function for hardware IPs.  The list of all the hardware
3661  * IPs that make up the asic is walked and the resume callbacks are run for
3662  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3663  * functional state after a suspend and updates the software state as
3664  * necessary.  This function is also used for restoring the GPU after a GPU
3665  * reset.
3666  * Returns 0 on success, negative error code on failure.
3667  */
3668 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3669 {
3670         int i, r;
3671
3672         for (i = 0; i < adev->num_ip_blocks; i++) {
3673                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3674                         continue;
3675                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3676                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3677                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3678                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3679                         continue;
3680                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3681                 if (r) {
3682                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3683                                   adev->ip_blocks[i].version->funcs->name, r);
3684                         return r;
3685                 }
3686                 adev->ip_blocks[i].status.hw = true;
3687         }
3688
3689         return 0;
3690 }
3691
3692 /**
3693  * amdgpu_device_ip_resume - run resume for hardware IPs
3694  *
3695  * @adev: amdgpu_device pointer
3696  *
3697  * Main resume function for hardware IPs.  The hardware IPs
3698  * are split into two resume functions because they are
3699  * also used in recovering from a GPU reset and some additional
3700  * steps need to be take between them.  In this case (S3/S4) they are
3701  * run sequentially.
3702  * Returns 0 on success, negative error code on failure.
3703  */
3704 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3705 {
3706         int r;
3707
3708         r = amdgpu_device_ip_resume_phase1(adev);
3709         if (r)
3710                 return r;
3711
3712         r = amdgpu_device_fw_loading(adev);
3713         if (r)
3714                 return r;
3715
3716         r = amdgpu_device_ip_resume_phase2(adev);
3717
3718         if (adev->mman.buffer_funcs_ring->sched.ready)
3719                 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3720
3721         return r;
3722 }
3723
3724 /**
3725  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3726  *
3727  * @adev: amdgpu_device pointer
3728  *
3729  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3730  */
3731 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3732 {
3733         if (amdgpu_sriov_vf(adev)) {
3734                 if (adev->is_atom_fw) {
3735                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3736                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3737                 } else {
3738                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3739                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3740                 }
3741
3742                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3743                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3744         }
3745 }
3746
3747 /**
3748  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3749  *
3750  * @asic_type: AMD asic type
3751  *
3752  * Check if there is DC (new modesetting infrastructre) support for an asic.
3753  * returns true if DC has support, false if not.
3754  */
3755 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3756 {
3757         switch (asic_type) {
3758 #ifdef CONFIG_DRM_AMDGPU_SI
3759         case CHIP_HAINAN:
3760 #endif
3761         case CHIP_TOPAZ:
3762                 /* chips with no display hardware */
3763                 return false;
3764 #if defined(CONFIG_DRM_AMD_DC)
3765         case CHIP_TAHITI:
3766         case CHIP_PITCAIRN:
3767         case CHIP_VERDE:
3768         case CHIP_OLAND:
3769                 /*
3770                  * We have systems in the wild with these ASICs that require
3771                  * LVDS and VGA support which is not supported with DC.
3772                  *
3773                  * Fallback to the non-DC driver here by default so as not to
3774                  * cause regressions.
3775                  */
3776 #if defined(CONFIG_DRM_AMD_DC_SI)
3777                 return amdgpu_dc > 0;
3778 #else
3779                 return false;
3780 #endif
3781         case CHIP_BONAIRE:
3782         case CHIP_KAVERI:
3783         case CHIP_KABINI:
3784         case CHIP_MULLINS:
3785                 /*
3786                  * We have systems in the wild with these ASICs that require
3787                  * VGA support which is not supported with DC.
3788                  *
3789                  * Fallback to the non-DC driver here by default so as not to
3790                  * cause regressions.
3791                  */
3792                 return amdgpu_dc > 0;
3793         default:
3794                 return amdgpu_dc != 0;
3795 #else
3796         default:
3797                 if (amdgpu_dc > 0)
3798                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3799                 return false;
3800 #endif
3801         }
3802 }
3803
3804 /**
3805  * amdgpu_device_has_dc_support - check if dc is supported
3806  *
3807  * @adev: amdgpu_device pointer
3808  *
3809  * Returns true for supported, false for not supported
3810  */
3811 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3812 {
3813         if (adev->enable_virtual_display ||
3814             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3815                 return false;
3816
3817         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3818 }
3819
3820 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3821 {
3822         struct amdgpu_device *adev =
3823                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3824         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3825
3826         /* It's a bug to not have a hive within this function */
3827         if (WARN_ON(!hive))
3828                 return;
3829
3830         /*
3831          * Use task barrier to synchronize all xgmi reset works across the
3832          * hive. task_barrier_enter and task_barrier_exit will block
3833          * until all the threads running the xgmi reset works reach
3834          * those points. task_barrier_full will do both blocks.
3835          */
3836         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3837
3838                 task_barrier_enter(&hive->tb);
3839                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3840
3841                 if (adev->asic_reset_res)
3842                         goto fail;
3843
3844                 task_barrier_exit(&hive->tb);
3845                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3846
3847                 if (adev->asic_reset_res)
3848                         goto fail;
3849
3850                 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3851         } else {
3852
3853                 task_barrier_full(&hive->tb);
3854                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3855         }
3856
3857 fail:
3858         if (adev->asic_reset_res)
3859                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3860                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3861         amdgpu_put_xgmi_hive(hive);
3862 }
3863
3864 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3865 {
3866         char *input = amdgpu_lockup_timeout;
3867         char *timeout_setting = NULL;
3868         int index = 0;
3869         long timeout;
3870         int ret = 0;
3871
3872         /*
3873          * By default timeout for non compute jobs is 10000
3874          * and 60000 for compute jobs.
3875          * In SR-IOV or passthrough mode, timeout for compute
3876          * jobs are 60000 by default.
3877          */
3878         adev->gfx_timeout = msecs_to_jiffies(10000);
3879         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3880         if (amdgpu_sriov_vf(adev))
3881                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3882                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3883         else
3884                 adev->compute_timeout =  msecs_to_jiffies(60000);
3885
3886         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3887                 while ((timeout_setting = strsep(&input, ",")) &&
3888                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3889                         ret = kstrtol(timeout_setting, 0, &timeout);
3890                         if (ret)
3891                                 return ret;
3892
3893                         if (timeout == 0) {
3894                                 index++;
3895                                 continue;
3896                         } else if (timeout < 0) {
3897                                 timeout = MAX_SCHEDULE_TIMEOUT;
3898                                 dev_warn(adev->dev, "lockup timeout disabled");
3899                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3900                         } else {
3901                                 timeout = msecs_to_jiffies(timeout);
3902                         }
3903
3904                         switch (index++) {
3905                         case 0:
3906                                 adev->gfx_timeout = timeout;
3907                                 break;
3908                         case 1:
3909                                 adev->compute_timeout = timeout;
3910                                 break;
3911                         case 2:
3912                                 adev->sdma_timeout = timeout;
3913                                 break;
3914                         case 3:
3915                                 adev->video_timeout = timeout;
3916                                 break;
3917                         default:
3918                                 break;
3919                         }
3920                 }
3921                 /*
3922                  * There is only one value specified and
3923                  * it should apply to all non-compute jobs.
3924                  */
3925                 if (index == 1) {
3926                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3927                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3928                                 adev->compute_timeout = adev->gfx_timeout;
3929                 }
3930         }
3931
3932         return ret;
3933 }
3934
3935 /**
3936  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3937  *
3938  * @adev: amdgpu_device pointer
3939  *
3940  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3941  */
3942 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3943 {
3944         struct iommu_domain *domain;
3945
3946         domain = iommu_get_domain_for_dev(adev->dev);
3947         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3948                 adev->ram_is_direct_mapped = true;
3949 }
3950
3951 static const struct attribute *amdgpu_dev_attributes[] = {
3952         &dev_attr_pcie_replay_count.attr,
3953         NULL
3954 };
3955
3956 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3957 {
3958         if (amdgpu_mcbp == 1)
3959                 adev->gfx.mcbp = true;
3960         else if (amdgpu_mcbp == 0)
3961                 adev->gfx.mcbp = false;
3962
3963         if (amdgpu_sriov_vf(adev))
3964                 adev->gfx.mcbp = true;
3965
3966         if (adev->gfx.mcbp)
3967                 DRM_INFO("MCBP is enabled\n");
3968 }
3969
3970 /**
3971  * amdgpu_device_init - initialize the driver
3972  *
3973  * @adev: amdgpu_device pointer
3974  * @flags: driver flags
3975  *
3976  * Initializes the driver info and hw (all asics).
3977  * Returns 0 for success or an error on failure.
3978  * Called at driver startup.
3979  */
3980 int amdgpu_device_init(struct amdgpu_device *adev,
3981                        uint32_t flags)
3982 {
3983         struct drm_device *ddev = adev_to_drm(adev);
3984         struct pci_dev *pdev = adev->pdev;
3985         int r, i;
3986         bool px = false;
3987         u32 max_MBps;
3988         int tmp;
3989
3990         adev->shutdown = false;
3991         adev->flags = flags;
3992
3993         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3994                 adev->asic_type = amdgpu_force_asic_type;
3995         else
3996                 adev->asic_type = flags & AMD_ASIC_MASK;
3997
3998         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3999         if (amdgpu_emu_mode == 1)
4000                 adev->usec_timeout *= 10;
4001         adev->gmc.gart_size = 512 * 1024 * 1024;
4002         adev->accel_working = false;
4003         adev->num_rings = 0;
4004         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4005         adev->mman.buffer_funcs = NULL;
4006         adev->mman.buffer_funcs_ring = NULL;
4007         adev->vm_manager.vm_pte_funcs = NULL;
4008         adev->vm_manager.vm_pte_num_scheds = 0;
4009         adev->gmc.gmc_funcs = NULL;
4010         adev->harvest_ip_mask = 0x0;
4011         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4012         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4013
4014         adev->smc_rreg = &amdgpu_invalid_rreg;
4015         adev->smc_wreg = &amdgpu_invalid_wreg;
4016         adev->pcie_rreg = &amdgpu_invalid_rreg;
4017         adev->pcie_wreg = &amdgpu_invalid_wreg;
4018         adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4019         adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4020         adev->pciep_rreg = &amdgpu_invalid_rreg;
4021         adev->pciep_wreg = &amdgpu_invalid_wreg;
4022         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4023         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4024         adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4025         adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4026         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4027         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4028         adev->didt_rreg = &amdgpu_invalid_rreg;
4029         adev->didt_wreg = &amdgpu_invalid_wreg;
4030         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4031         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4032         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4033         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4034
4035         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4036                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4037                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4038
4039         /* mutex initialization are all done here so we
4040          * can recall function without having locking issues
4041          */
4042         mutex_init(&adev->firmware.mutex);
4043         mutex_init(&adev->pm.mutex);
4044         mutex_init(&adev->gfx.gpu_clock_mutex);
4045         mutex_init(&adev->srbm_mutex);
4046         mutex_init(&adev->gfx.pipe_reserve_mutex);
4047         mutex_init(&adev->gfx.gfx_off_mutex);
4048         mutex_init(&adev->gfx.partition_mutex);
4049         mutex_init(&adev->grbm_idx_mutex);
4050         mutex_init(&adev->mn_lock);
4051         mutex_init(&adev->virt.vf_errors.lock);
4052         mutex_init(&adev->virt.rlcg_reg_lock);
4053         hash_init(adev->mn_hash);
4054         mutex_init(&adev->psp.mutex);
4055         mutex_init(&adev->notifier_lock);
4056         mutex_init(&adev->pm.stable_pstate_ctx_lock);
4057         mutex_init(&adev->benchmark_mutex);
4058
4059         amdgpu_device_init_apu_flags(adev);
4060
4061         r = amdgpu_device_check_arguments(adev);
4062         if (r)
4063                 return r;
4064
4065         spin_lock_init(&adev->mmio_idx_lock);
4066         spin_lock_init(&adev->smc_idx_lock);
4067         spin_lock_init(&adev->pcie_idx_lock);
4068         spin_lock_init(&adev->uvd_ctx_idx_lock);
4069         spin_lock_init(&adev->didt_idx_lock);
4070         spin_lock_init(&adev->gc_cac_idx_lock);
4071         spin_lock_init(&adev->se_cac_idx_lock);
4072         spin_lock_init(&adev->audio_endpt_idx_lock);
4073         spin_lock_init(&adev->mm_stats.lock);
4074         spin_lock_init(&adev->wb.lock);
4075
4076         INIT_LIST_HEAD(&adev->shadow_list);
4077         mutex_init(&adev->shadow_list_lock);
4078
4079         INIT_LIST_HEAD(&adev->reset_list);
4080
4081         INIT_LIST_HEAD(&adev->ras_list);
4082
4083         INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4084
4085         INIT_DELAYED_WORK(&adev->delayed_init_work,
4086                           amdgpu_device_delayed_init_work_handler);
4087         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4088                           amdgpu_device_delay_enable_gfx_off);
4089
4090         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4091
4092         adev->gfx.gfx_off_req_count = 1;
4093         adev->gfx.gfx_off_residency = 0;
4094         adev->gfx.gfx_off_entrycount = 0;
4095         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4096
4097         atomic_set(&adev->throttling_logging_enabled, 1);
4098         /*
4099          * If throttling continues, logging will be performed every minute
4100          * to avoid log flooding. "-1" is subtracted since the thermal
4101          * throttling interrupt comes every second. Thus, the total logging
4102          * interval is 59 seconds(retelimited printk interval) + 1(waiting
4103          * for throttling interrupt) = 60 seconds.
4104          */
4105         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4106         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4107
4108         /* Registers mapping */
4109         /* TODO: block userspace mapping of io register */
4110         if (adev->asic_type >= CHIP_BONAIRE) {
4111                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4112                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4113         } else {
4114                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4115                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4116         }
4117
4118         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4119                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4120
4121         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4122         if (!adev->rmmio)
4123                 return -ENOMEM;
4124
4125         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4126         DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4127
4128         /*
4129          * Reset domain needs to be present early, before XGMI hive discovered
4130          * (if any) and intitialized to use reset sem and in_gpu reset flag
4131          * early on during init and before calling to RREG32.
4132          */
4133         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4134         if (!adev->reset_domain)
4135                 return -ENOMEM;
4136
4137         /* detect hw virtualization here */
4138         amdgpu_detect_virtualization(adev);
4139
4140         amdgpu_device_get_pcie_info(adev);
4141
4142         r = amdgpu_device_get_job_timeout_settings(adev);
4143         if (r) {
4144                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4145                 return r;
4146         }
4147
4148         amdgpu_device_set_mcbp(adev);
4149
4150         /* early init functions */
4151         r = amdgpu_device_ip_early_init(adev);
4152         if (r)
4153                 return r;
4154
4155         /* Get rid of things like offb */
4156         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4157         if (r)
4158                 return r;
4159
4160         /* Enable TMZ based on IP_VERSION */
4161         amdgpu_gmc_tmz_set(adev);
4162
4163         if (amdgpu_sriov_vf(adev) &&
4164             amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4165                 /* VF MMIO access (except mailbox range) from CPU
4166                  * will be blocked during sriov runtime
4167                  */
4168                 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4169
4170         amdgpu_gmc_noretry_set(adev);
4171         /* Need to get xgmi info early to decide the reset behavior*/
4172         if (adev->gmc.xgmi.supported) {
4173                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4174                 if (r)
4175                         return r;
4176         }
4177
4178         /* enable PCIE atomic ops */
4179         if (amdgpu_sriov_vf(adev)) {
4180                 if (adev->virt.fw_reserve.p_pf2vf)
4181                         adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4182                                                       adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4183                                 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4184         /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4185          * internal path natively support atomics, set have_atomics_support to true.
4186          */
4187         } else if ((adev->flags & AMD_IS_APU) &&
4188                    (amdgpu_ip_version(adev, GC_HWIP, 0) >
4189                     IP_VERSION(9, 0, 0))) {
4190                 adev->have_atomics_support = true;
4191         } else {
4192                 adev->have_atomics_support =
4193                         !pci_enable_atomic_ops_to_root(adev->pdev,
4194                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4195                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4196         }
4197
4198         if (!adev->have_atomics_support)
4199                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4200
4201         /* doorbell bar mapping and doorbell index init*/
4202         amdgpu_doorbell_init(adev);
4203
4204         if (amdgpu_emu_mode == 1) {
4205                 /* post the asic on emulation mode */
4206                 emu_soc_asic_init(adev);
4207                 goto fence_driver_init;
4208         }
4209
4210         amdgpu_reset_init(adev);
4211
4212         /* detect if we are with an SRIOV vbios */
4213         if (adev->bios)
4214                 amdgpu_device_detect_sriov_bios(adev);
4215
4216         /* check if we need to reset the asic
4217          *  E.g., driver was not cleanly unloaded previously, etc.
4218          */
4219         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4220                 if (adev->gmc.xgmi.num_physical_nodes) {
4221                         dev_info(adev->dev, "Pending hive reset.\n");
4222                         adev->gmc.xgmi.pending_reset = true;
4223                         /* Only need to init necessary block for SMU to handle the reset */
4224                         for (i = 0; i < adev->num_ip_blocks; i++) {
4225                                 if (!adev->ip_blocks[i].status.valid)
4226                                         continue;
4227                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4228                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4229                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4230                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4231                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
4232                                                 adev->ip_blocks[i].version->funcs->name);
4233                                         adev->ip_blocks[i].status.hw = true;
4234                                 }
4235                         }
4236                 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4237                                    !amdgpu_device_has_display_hardware(adev)) {
4238                                         r = psp_gpu_reset(adev);
4239                 } else {
4240                                 tmp = amdgpu_reset_method;
4241                                 /* It should do a default reset when loading or reloading the driver,
4242                                  * regardless of the module parameter reset_method.
4243                                  */
4244                                 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4245                                 r = amdgpu_asic_reset(adev);
4246                                 amdgpu_reset_method = tmp;
4247                 }
4248
4249                 if (r) {
4250                   dev_err(adev->dev, "asic reset on init failed\n");
4251                   goto failed;
4252                 }
4253         }
4254
4255         /* Post card if necessary */
4256         if (amdgpu_device_need_post(adev)) {
4257                 if (!adev->bios) {
4258                         dev_err(adev->dev, "no vBIOS found\n");
4259                         r = -EINVAL;
4260                         goto failed;
4261                 }
4262                 DRM_INFO("GPU posting now...\n");
4263                 r = amdgpu_device_asic_init(adev);
4264                 if (r) {
4265                         dev_err(adev->dev, "gpu post error!\n");
4266                         goto failed;
4267                 }
4268         }
4269
4270         if (adev->bios) {
4271                 if (adev->is_atom_fw) {
4272                         /* Initialize clocks */
4273                         r = amdgpu_atomfirmware_get_clock_info(adev);
4274                         if (r) {
4275                                 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4276                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4277                                 goto failed;
4278                         }
4279                 } else {
4280                         /* Initialize clocks */
4281                         r = amdgpu_atombios_get_clock_info(adev);
4282                         if (r) {
4283                                 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4284                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4285                                 goto failed;
4286                         }
4287                         /* init i2c buses */
4288                         if (!amdgpu_device_has_dc_support(adev))
4289                                 amdgpu_atombios_i2c_init(adev);
4290                 }
4291         }
4292
4293 fence_driver_init:
4294         /* Fence driver */
4295         r = amdgpu_fence_driver_sw_init(adev);
4296         if (r) {
4297                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4298                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4299                 goto failed;
4300         }
4301
4302         /* init the mode config */
4303         drm_mode_config_init(adev_to_drm(adev));
4304
4305         r = amdgpu_device_ip_init(adev);
4306         if (r) {
4307                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4308                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4309                 goto release_ras_con;
4310         }
4311
4312         amdgpu_fence_driver_hw_init(adev);
4313
4314         dev_info(adev->dev,
4315                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4316                         adev->gfx.config.max_shader_engines,
4317                         adev->gfx.config.max_sh_per_se,
4318                         adev->gfx.config.max_cu_per_sh,
4319                         adev->gfx.cu_info.number);
4320
4321         adev->accel_working = true;
4322
4323         amdgpu_vm_check_compute_bug(adev);
4324
4325         /* Initialize the buffer migration limit. */
4326         if (amdgpu_moverate >= 0)
4327                 max_MBps = amdgpu_moverate;
4328         else
4329                 max_MBps = 8; /* Allow 8 MB/s. */
4330         /* Get a log2 for easy divisions. */
4331         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4332
4333         /*
4334          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4335          * Otherwise the mgpu fan boost feature will be skipped due to the
4336          * gpu instance is counted less.
4337          */
4338         amdgpu_register_gpu_instance(adev);
4339
4340         /* enable clockgating, etc. after ib tests, etc. since some blocks require
4341          * explicit gating rather than handling it automatically.
4342          */
4343         if (!adev->gmc.xgmi.pending_reset) {
4344                 r = amdgpu_device_ip_late_init(adev);
4345                 if (r) {
4346                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4347                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4348                         goto release_ras_con;
4349                 }
4350                 /* must succeed. */
4351                 amdgpu_ras_resume(adev);
4352                 queue_delayed_work(system_wq, &adev->delayed_init_work,
4353                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4354         }
4355
4356         if (amdgpu_sriov_vf(adev)) {
4357                 amdgpu_virt_release_full_gpu(adev, true);
4358                 flush_delayed_work(&adev->delayed_init_work);
4359         }
4360
4361         /*
4362          * Place those sysfs registering after `late_init`. As some of those
4363          * operations performed in `late_init` might affect the sysfs
4364          * interfaces creating.
4365          */
4366         r = amdgpu_atombios_sysfs_init(adev);
4367         if (r)
4368                 drm_err(&adev->ddev,
4369                         "registering atombios sysfs failed (%d).\n", r);
4370
4371         r = amdgpu_pm_sysfs_init(adev);
4372         if (r)
4373                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4374
4375         r = amdgpu_ucode_sysfs_init(adev);
4376         if (r) {
4377                 adev->ucode_sysfs_en = false;
4378                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4379         } else
4380                 adev->ucode_sysfs_en = true;
4381
4382         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4383         if (r)
4384                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
4385
4386         r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4387         if (r)
4388                 dev_err(adev->dev,
4389                         "Could not create amdgpu board attributes\n");
4390
4391         amdgpu_fru_sysfs_init(adev);
4392         amdgpu_reg_state_sysfs_init(adev);
4393
4394         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4395                 r = amdgpu_pmu_init(adev);
4396         if (r)
4397                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4398
4399         /* Have stored pci confspace at hand for restore in sudden PCI error */
4400         if (amdgpu_device_cache_pci_state(adev->pdev))
4401                 pci_restore_state(pdev);
4402
4403         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4404         /* this will fail for cards that aren't VGA class devices, just
4405          * ignore it
4406          */
4407         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4408                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4409
4410         px = amdgpu_device_supports_px(ddev);
4411
4412         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4413                                 apple_gmux_detect(NULL, NULL)))
4414                 vga_switcheroo_register_client(adev->pdev,
4415                                                &amdgpu_switcheroo_ops, px);
4416
4417         if (px)
4418                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4419
4420         if (adev->gmc.xgmi.pending_reset)
4421                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4422                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
4423
4424         amdgpu_device_check_iommu_direct_map(adev);
4425
4426         return 0;
4427
4428 release_ras_con:
4429         if (amdgpu_sriov_vf(adev))
4430                 amdgpu_virt_release_full_gpu(adev, true);
4431
4432         /* failed in exclusive mode due to timeout */
4433         if (amdgpu_sriov_vf(adev) &&
4434                 !amdgpu_sriov_runtime(adev) &&
4435                 amdgpu_virt_mmio_blocked(adev) &&
4436                 !amdgpu_virt_wait_reset(adev)) {
4437                 dev_err(adev->dev, "VF exclusive mode timeout\n");
4438                 /* Don't send request since VF is inactive. */
4439                 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4440                 adev->virt.ops = NULL;
4441                 r = -EAGAIN;
4442         }
4443         amdgpu_release_ras_context(adev);
4444
4445 failed:
4446         amdgpu_vf_error_trans_all(adev);
4447
4448         return r;
4449 }
4450
4451 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4452 {
4453
4454         /* Clear all CPU mappings pointing to this device */
4455         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4456
4457         /* Unmap all mapped bars - Doorbell, registers and VRAM */
4458         amdgpu_doorbell_fini(adev);
4459
4460         iounmap(adev->rmmio);
4461         adev->rmmio = NULL;
4462         if (adev->mman.aper_base_kaddr)
4463                 iounmap(adev->mman.aper_base_kaddr);
4464         adev->mman.aper_base_kaddr = NULL;
4465
4466         /* Memory manager related */
4467         if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4468                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4469                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4470         }
4471 }
4472
4473 /**
4474  * amdgpu_device_fini_hw - tear down the driver
4475  *
4476  * @adev: amdgpu_device pointer
4477  *
4478  * Tear down the driver info (all asics).
4479  * Called at driver shutdown.
4480  */
4481 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4482 {
4483         dev_info(adev->dev, "amdgpu: finishing device.\n");
4484         flush_delayed_work(&adev->delayed_init_work);
4485         adev->shutdown = true;
4486
4487         /* make sure IB test finished before entering exclusive mode
4488          * to avoid preemption on IB test
4489          */
4490         if (amdgpu_sriov_vf(adev)) {
4491                 amdgpu_virt_request_full_gpu(adev, false);
4492                 amdgpu_virt_fini_data_exchange(adev);
4493         }
4494
4495         /* disable all interrupts */
4496         amdgpu_irq_disable_all(adev);
4497         if (adev->mode_info.mode_config_initialized) {
4498                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4499                         drm_helper_force_disable_all(adev_to_drm(adev));
4500                 else
4501                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4502         }
4503         amdgpu_fence_driver_hw_fini(adev);
4504
4505         if (adev->mman.initialized)
4506                 drain_workqueue(adev->mman.bdev.wq);
4507
4508         if (adev->pm.sysfs_initialized)
4509                 amdgpu_pm_sysfs_fini(adev);
4510         if (adev->ucode_sysfs_en)
4511                 amdgpu_ucode_sysfs_fini(adev);
4512         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4513         amdgpu_fru_sysfs_fini(adev);
4514
4515         amdgpu_reg_state_sysfs_fini(adev);
4516
4517         /* disable ras feature must before hw fini */
4518         amdgpu_ras_pre_fini(adev);
4519
4520         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4521
4522         amdgpu_device_ip_fini_early(adev);
4523
4524         amdgpu_irq_fini_hw(adev);
4525
4526         if (adev->mman.initialized)
4527                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4528
4529         amdgpu_gart_dummy_page_fini(adev);
4530
4531         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4532                 amdgpu_device_unmap_mmio(adev);
4533
4534 }
4535
4536 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4537 {
4538         int idx;
4539         bool px;
4540
4541         amdgpu_fence_driver_sw_fini(adev);
4542         amdgpu_device_ip_fini(adev);
4543         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4544         adev->accel_working = false;
4545         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4546
4547         amdgpu_reset_fini(adev);
4548
4549         /* free i2c buses */
4550         if (!amdgpu_device_has_dc_support(adev))
4551                 amdgpu_i2c_fini(adev);
4552
4553         if (amdgpu_emu_mode != 1)
4554                 amdgpu_atombios_fini(adev);
4555
4556         kfree(adev->bios);
4557         adev->bios = NULL;
4558
4559         kfree(adev->fru_info);
4560         adev->fru_info = NULL;
4561
4562         px = amdgpu_device_supports_px(adev_to_drm(adev));
4563
4564         if (px || (!dev_is_removable(&adev->pdev->dev) &&
4565                                 apple_gmux_detect(NULL, NULL)))
4566                 vga_switcheroo_unregister_client(adev->pdev);
4567
4568         if (px)
4569                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4570
4571         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4572                 vga_client_unregister(adev->pdev);
4573
4574         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4575
4576                 iounmap(adev->rmmio);
4577                 adev->rmmio = NULL;
4578                 amdgpu_doorbell_fini(adev);
4579                 drm_dev_exit(idx);
4580         }
4581
4582         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4583                 amdgpu_pmu_fini(adev);
4584         if (adev->mman.discovery_bin)
4585                 amdgpu_discovery_fini(adev);
4586
4587         amdgpu_reset_put_reset_domain(adev->reset_domain);
4588         adev->reset_domain = NULL;
4589
4590         kfree(adev->pci_state);
4591
4592 }
4593
4594 /**
4595  * amdgpu_device_evict_resources - evict device resources
4596  * @adev: amdgpu device object
4597  *
4598  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4599  * of the vram memory type. Mainly used for evicting device resources
4600  * at suspend time.
4601  *
4602  */
4603 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4604 {
4605         int ret;
4606
4607         /* No need to evict vram on APUs for suspend to ram or s2idle */
4608         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4609                 return 0;
4610
4611         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4612         if (ret)
4613                 DRM_WARN("evicting device resources failed\n");
4614         return ret;
4615 }
4616
4617 /*
4618  * Suspend & resume.
4619  */
4620 /**
4621  * amdgpu_device_prepare - prepare for device suspend
4622  *
4623  * @dev: drm dev pointer
4624  *
4625  * Prepare to put the hw in the suspend state (all asics).
4626  * Returns 0 for success or an error on failure.
4627  * Called at driver suspend.
4628  */
4629 int amdgpu_device_prepare(struct drm_device *dev)
4630 {
4631         struct amdgpu_device *adev = drm_to_adev(dev);
4632         int i, r;
4633
4634         amdgpu_choose_low_power_state(adev);
4635
4636         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4637                 return 0;
4638
4639         /* Evict the majority of BOs before starting suspend sequence */
4640         r = amdgpu_device_evict_resources(adev);
4641         if (r)
4642                 goto unprepare;
4643
4644         flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4645
4646         for (i = 0; i < adev->num_ip_blocks; i++) {
4647                 if (!adev->ip_blocks[i].status.valid)
4648                         continue;
4649                 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4650                         continue;
4651                 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4652                 if (r)
4653                         goto unprepare;
4654         }
4655
4656         return 0;
4657
4658 unprepare:
4659         adev->in_s0ix = adev->in_s3 = false;
4660
4661         return r;
4662 }
4663
4664 /**
4665  * amdgpu_device_suspend - initiate device suspend
4666  *
4667  * @dev: drm dev pointer
4668  * @fbcon : notify the fbdev of suspend
4669  *
4670  * Puts the hw in the suspend state (all asics).
4671  * Returns 0 for success or an error on failure.
4672  * Called at driver suspend.
4673  */
4674 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4675 {
4676         struct amdgpu_device *adev = drm_to_adev(dev);
4677         int r = 0;
4678
4679         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4680                 return 0;
4681
4682         adev->in_suspend = true;
4683
4684         if (amdgpu_sriov_vf(adev)) {
4685                 amdgpu_virt_fini_data_exchange(adev);
4686                 r = amdgpu_virt_request_full_gpu(adev, false);
4687                 if (r)
4688                         return r;
4689         }
4690
4691         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4692                 DRM_WARN("smart shift update failed\n");
4693
4694         if (fbcon)
4695                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4696
4697         cancel_delayed_work_sync(&adev->delayed_init_work);
4698
4699         amdgpu_ras_suspend(adev);
4700
4701         amdgpu_device_ip_suspend_phase1(adev);
4702
4703         if (!adev->in_s0ix)
4704                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4705
4706         r = amdgpu_device_evict_resources(adev);
4707         if (r)
4708                 return r;
4709
4710         amdgpu_ttm_set_buffer_funcs_status(adev, false);
4711
4712         amdgpu_fence_driver_hw_fini(adev);
4713
4714         amdgpu_device_ip_suspend_phase2(adev);
4715
4716         if (amdgpu_sriov_vf(adev))
4717                 amdgpu_virt_release_full_gpu(adev, false);
4718
4719         r = amdgpu_dpm_notify_rlc_state(adev, false);
4720         if (r)
4721                 return r;
4722
4723         return 0;
4724 }
4725
4726 /**
4727  * amdgpu_device_resume - initiate device resume
4728  *
4729  * @dev: drm dev pointer
4730  * @fbcon : notify the fbdev of resume
4731  *
4732  * Bring the hw back to operating state (all asics).
4733  * Returns 0 for success or an error on failure.
4734  * Called at driver resume.
4735  */
4736 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4737 {
4738         struct amdgpu_device *adev = drm_to_adev(dev);
4739         int r = 0;
4740
4741         if (amdgpu_sriov_vf(adev)) {
4742                 r = amdgpu_virt_request_full_gpu(adev, true);
4743                 if (r)
4744                         return r;
4745         }
4746
4747         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4748                 return 0;
4749
4750         if (adev->in_s0ix)
4751                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4752
4753         /* post card */
4754         if (amdgpu_device_need_post(adev)) {
4755                 r = amdgpu_device_asic_init(adev);
4756                 if (r)
4757                         dev_err(adev->dev, "amdgpu asic init failed\n");
4758         }
4759
4760         r = amdgpu_device_ip_resume(adev);
4761
4762         if (r) {
4763                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4764                 goto exit;
4765         }
4766         amdgpu_fence_driver_hw_init(adev);
4767
4768         if (!adev->in_s0ix) {
4769                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4770                 if (r)
4771                         goto exit;
4772         }
4773
4774         r = amdgpu_device_ip_late_init(adev);
4775         if (r)
4776                 goto exit;
4777
4778         queue_delayed_work(system_wq, &adev->delayed_init_work,
4779                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4780 exit:
4781         if (amdgpu_sriov_vf(adev)) {
4782                 amdgpu_virt_init_data_exchange(adev);
4783                 amdgpu_virt_release_full_gpu(adev, true);
4784         }
4785
4786         if (r)
4787                 return r;
4788
4789         /* Make sure IB tests flushed */
4790         flush_delayed_work(&adev->delayed_init_work);
4791
4792         if (fbcon)
4793                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4794
4795         amdgpu_ras_resume(adev);
4796
4797         if (adev->mode_info.num_crtc) {
4798                 /*
4799                  * Most of the connector probing functions try to acquire runtime pm
4800                  * refs to ensure that the GPU is powered on when connector polling is
4801                  * performed. Since we're calling this from a runtime PM callback,
4802                  * trying to acquire rpm refs will cause us to deadlock.
4803                  *
4804                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4805                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4806                  */
4807 #ifdef CONFIG_PM
4808                 dev->dev->power.disable_depth++;
4809 #endif
4810                 if (!adev->dc_enabled)
4811                         drm_helper_hpd_irq_event(dev);
4812                 else
4813                         drm_kms_helper_hotplug_event(dev);
4814 #ifdef CONFIG_PM
4815                 dev->dev->power.disable_depth--;
4816 #endif
4817         }
4818         adev->in_suspend = false;
4819
4820         if (adev->enable_mes)
4821                 amdgpu_mes_self_test(adev);
4822
4823         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4824                 DRM_WARN("smart shift update failed\n");
4825
4826         return 0;
4827 }
4828
4829 /**
4830  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4831  *
4832  * @adev: amdgpu_device pointer
4833  *
4834  * The list of all the hardware IPs that make up the asic is walked and
4835  * the check_soft_reset callbacks are run.  check_soft_reset determines
4836  * if the asic is still hung or not.
4837  * Returns true if any of the IPs are still in a hung state, false if not.
4838  */
4839 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4840 {
4841         int i;
4842         bool asic_hang = false;
4843
4844         if (amdgpu_sriov_vf(adev))
4845                 return true;
4846
4847         if (amdgpu_asic_need_full_reset(adev))
4848                 return true;
4849
4850         for (i = 0; i < adev->num_ip_blocks; i++) {
4851                 if (!adev->ip_blocks[i].status.valid)
4852                         continue;
4853                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4854                         adev->ip_blocks[i].status.hang =
4855                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4856                 if (adev->ip_blocks[i].status.hang) {
4857                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4858                         asic_hang = true;
4859                 }
4860         }
4861         return asic_hang;
4862 }
4863
4864 /**
4865  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4866  *
4867  * @adev: amdgpu_device pointer
4868  *
4869  * The list of all the hardware IPs that make up the asic is walked and the
4870  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4871  * handles any IP specific hardware or software state changes that are
4872  * necessary for a soft reset to succeed.
4873  * Returns 0 on success, negative error code on failure.
4874  */
4875 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4876 {
4877         int i, r = 0;
4878
4879         for (i = 0; i < adev->num_ip_blocks; i++) {
4880                 if (!adev->ip_blocks[i].status.valid)
4881                         continue;
4882                 if (adev->ip_blocks[i].status.hang &&
4883                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4884                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4885                         if (r)
4886                                 return r;
4887                 }
4888         }
4889
4890         return 0;
4891 }
4892
4893 /**
4894  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4895  *
4896  * @adev: amdgpu_device pointer
4897  *
4898  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4899  * reset is necessary to recover.
4900  * Returns true if a full asic reset is required, false if not.
4901  */
4902 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4903 {
4904         int i;
4905
4906         if (amdgpu_asic_need_full_reset(adev))
4907                 return true;
4908
4909         for (i = 0; i < adev->num_ip_blocks; i++) {
4910                 if (!adev->ip_blocks[i].status.valid)
4911                         continue;
4912                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4913                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4914                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4915                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4916                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4917                         if (adev->ip_blocks[i].status.hang) {
4918                                 dev_info(adev->dev, "Some block need full reset!\n");
4919                                 return true;
4920                         }
4921                 }
4922         }
4923         return false;
4924 }
4925
4926 /**
4927  * amdgpu_device_ip_soft_reset - do a soft reset
4928  *
4929  * @adev: amdgpu_device pointer
4930  *
4931  * The list of all the hardware IPs that make up the asic is walked and the
4932  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4933  * IP specific hardware or software state changes that are necessary to soft
4934  * reset the IP.
4935  * Returns 0 on success, negative error code on failure.
4936  */
4937 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4938 {
4939         int i, r = 0;
4940
4941         for (i = 0; i < adev->num_ip_blocks; i++) {
4942                 if (!adev->ip_blocks[i].status.valid)
4943                         continue;
4944                 if (adev->ip_blocks[i].status.hang &&
4945                     adev->ip_blocks[i].version->funcs->soft_reset) {
4946                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4947                         if (r)
4948                                 return r;
4949                 }
4950         }
4951
4952         return 0;
4953 }
4954
4955 /**
4956  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4957  *
4958  * @adev: amdgpu_device pointer
4959  *
4960  * The list of all the hardware IPs that make up the asic is walked and the
4961  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4962  * handles any IP specific hardware or software state changes that are
4963  * necessary after the IP has been soft reset.
4964  * Returns 0 on success, negative error code on failure.
4965  */
4966 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4967 {
4968         int i, r = 0;
4969
4970         for (i = 0; i < adev->num_ip_blocks; i++) {
4971                 if (!adev->ip_blocks[i].status.valid)
4972                         continue;
4973                 if (adev->ip_blocks[i].status.hang &&
4974                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4975                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4976                 if (r)
4977                         return r;
4978         }
4979
4980         return 0;
4981 }
4982
4983 /**
4984  * amdgpu_device_recover_vram - Recover some VRAM contents
4985  *
4986  * @adev: amdgpu_device pointer
4987  *
4988  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4989  * restore things like GPUVM page tables after a GPU reset where
4990  * the contents of VRAM might be lost.
4991  *
4992  * Returns:
4993  * 0 on success, negative error code on failure.
4994  */
4995 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4996 {
4997         struct dma_fence *fence = NULL, *next = NULL;
4998         struct amdgpu_bo *shadow;
4999         struct amdgpu_bo_vm *vmbo;
5000         long r = 1, tmo;
5001
5002         if (amdgpu_sriov_runtime(adev))
5003                 tmo = msecs_to_jiffies(8000);
5004         else
5005                 tmo = msecs_to_jiffies(100);
5006
5007         dev_info(adev->dev, "recover vram bo from shadow start\n");
5008         mutex_lock(&adev->shadow_list_lock);
5009         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
5010                 /* If vm is compute context or adev is APU, shadow will be NULL */
5011                 if (!vmbo->shadow)
5012                         continue;
5013                 shadow = vmbo->shadow;
5014
5015                 /* No need to recover an evicted BO */
5016                 if (!shadow->tbo.resource ||
5017                     shadow->tbo.resource->mem_type != TTM_PL_TT ||
5018                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
5019                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
5020                         continue;
5021
5022                 r = amdgpu_bo_restore_shadow(shadow, &next);
5023                 if (r)
5024                         break;
5025
5026                 if (fence) {
5027                         tmo = dma_fence_wait_timeout(fence, false, tmo);
5028                         dma_fence_put(fence);
5029                         fence = next;
5030                         if (tmo == 0) {
5031                                 r = -ETIMEDOUT;
5032                                 break;
5033                         } else if (tmo < 0) {
5034                                 r = tmo;
5035                                 break;
5036                         }
5037                 } else {
5038                         fence = next;
5039                 }
5040         }
5041         mutex_unlock(&adev->shadow_list_lock);
5042
5043         if (fence)
5044                 tmo = dma_fence_wait_timeout(fence, false, tmo);
5045         dma_fence_put(fence);
5046
5047         if (r < 0 || tmo <= 0) {
5048                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
5049                 return -EIO;
5050         }
5051
5052         dev_info(adev->dev, "recover vram bo from shadow done\n");
5053         return 0;
5054 }
5055
5056
5057 /**
5058  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5059  *
5060  * @adev: amdgpu_device pointer
5061  * @reset_context: amdgpu reset context pointer
5062  *
5063  * do VF FLR and reinitialize Asic
5064  * return 0 means succeeded otherwise failed
5065  */
5066 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5067                                      struct amdgpu_reset_context *reset_context)
5068 {
5069         int r;
5070         struct amdgpu_hive_info *hive = NULL;
5071
5072         if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5073                 if (!amdgpu_ras_get_fed_status(adev))
5074                         amdgpu_virt_ready_to_reset(adev);
5075                 amdgpu_virt_wait_reset(adev);
5076                 clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5077                 r = amdgpu_virt_request_full_gpu(adev, true);
5078         } else {
5079                 r = amdgpu_virt_reset_gpu(adev);
5080         }
5081         if (r)
5082                 return r;
5083
5084         amdgpu_ras_set_fed(adev, false);
5085         amdgpu_irq_gpu_reset_resume_helper(adev);
5086
5087         /* some sw clean up VF needs to do before recover */
5088         amdgpu_virt_post_reset(adev);
5089
5090         /* Resume IP prior to SMC */
5091         r = amdgpu_device_ip_reinit_early_sriov(adev);
5092         if (r)
5093                 return r;
5094
5095         amdgpu_virt_init_data_exchange(adev);
5096
5097         r = amdgpu_device_fw_loading(adev);
5098         if (r)
5099                 return r;
5100
5101         /* now we are okay to resume SMC/CP/SDMA */
5102         r = amdgpu_device_ip_reinit_late_sriov(adev);
5103         if (r)
5104                 return r;
5105
5106         hive = amdgpu_get_xgmi_hive(adev);
5107         /* Update PSP FW topology after reset */
5108         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5109                 r = amdgpu_xgmi_update_topology(hive, adev);
5110         if (hive)
5111                 amdgpu_put_xgmi_hive(hive);
5112         if (r)
5113                 return r;
5114
5115         r = amdgpu_ib_ring_tests(adev);
5116         if (r)
5117                 return r;
5118
5119         if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
5120                 amdgpu_inc_vram_lost(adev);
5121                 r = amdgpu_device_recover_vram(adev);
5122         }
5123         if (r)
5124                 return r;
5125
5126         /* need to be called during full access so we can't do it later like
5127          * bare-metal does.
5128          */
5129         amdgpu_amdkfd_post_reset(adev);
5130         amdgpu_virt_release_full_gpu(adev, true);
5131
5132         /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5133         if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5134             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5135             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5136             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5137                 amdgpu_ras_resume(adev);
5138         return 0;
5139 }
5140
5141 /**
5142  * amdgpu_device_has_job_running - check if there is any job in mirror list
5143  *
5144  * @adev: amdgpu_device pointer
5145  *
5146  * check if there is any job in mirror list
5147  */
5148 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5149 {
5150         int i;
5151         struct drm_sched_job *job;
5152
5153         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5154                 struct amdgpu_ring *ring = adev->rings[i];
5155
5156                 if (!amdgpu_ring_sched_ready(ring))
5157                         continue;
5158
5159                 spin_lock(&ring->sched.job_list_lock);
5160                 job = list_first_entry_or_null(&ring->sched.pending_list,
5161                                                struct drm_sched_job, list);
5162                 spin_unlock(&ring->sched.job_list_lock);
5163                 if (job)
5164                         return true;
5165         }
5166         return false;
5167 }
5168
5169 /**
5170  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5171  *
5172  * @adev: amdgpu_device pointer
5173  *
5174  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5175  * a hung GPU.
5176  */
5177 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5178 {
5179
5180         if (amdgpu_gpu_recovery == 0)
5181                 goto disabled;
5182
5183         /* Skip soft reset check in fatal error mode */
5184         if (!amdgpu_ras_is_poison_mode_supported(adev))
5185                 return true;
5186
5187         if (amdgpu_sriov_vf(adev))
5188                 return true;
5189
5190         if (amdgpu_gpu_recovery == -1) {
5191                 switch (adev->asic_type) {
5192 #ifdef CONFIG_DRM_AMDGPU_SI
5193                 case CHIP_VERDE:
5194                 case CHIP_TAHITI:
5195                 case CHIP_PITCAIRN:
5196                 case CHIP_OLAND:
5197                 case CHIP_HAINAN:
5198 #endif
5199 #ifdef CONFIG_DRM_AMDGPU_CIK
5200                 case CHIP_KAVERI:
5201                 case CHIP_KABINI:
5202                 case CHIP_MULLINS:
5203 #endif
5204                 case CHIP_CARRIZO:
5205                 case CHIP_STONEY:
5206                 case CHIP_CYAN_SKILLFISH:
5207                         goto disabled;
5208                 default:
5209                         break;
5210                 }
5211         }
5212
5213         return true;
5214
5215 disabled:
5216                 dev_info(adev->dev, "GPU recovery disabled.\n");
5217                 return false;
5218 }
5219
5220 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5221 {
5222         u32 i;
5223         int ret = 0;
5224
5225         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5226
5227         dev_info(adev->dev, "GPU mode1 reset\n");
5228
5229         /* Cache the state before bus master disable. The saved config space
5230          * values are used in other cases like restore after mode-2 reset.
5231          */
5232         amdgpu_device_cache_pci_state(adev->pdev);
5233
5234         /* disable BM */
5235         pci_clear_master(adev->pdev);
5236
5237         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5238                 dev_info(adev->dev, "GPU smu mode1 reset\n");
5239                 ret = amdgpu_dpm_mode1_reset(adev);
5240         } else {
5241                 dev_info(adev->dev, "GPU psp mode1 reset\n");
5242                 ret = psp_gpu_reset(adev);
5243         }
5244
5245         if (ret)
5246                 goto mode1_reset_failed;
5247
5248         amdgpu_device_load_pci_state(adev->pdev);
5249         ret = amdgpu_psp_wait_for_bootloader(adev);
5250         if (ret)
5251                 goto mode1_reset_failed;
5252
5253         /* wait for asic to come out of reset */
5254         for (i = 0; i < adev->usec_timeout; i++) {
5255                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5256
5257                 if (memsize != 0xffffffff)
5258                         break;
5259                 udelay(1);
5260         }
5261
5262         if (i >= adev->usec_timeout) {
5263                 ret = -ETIMEDOUT;
5264                 goto mode1_reset_failed;
5265         }
5266
5267         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5268
5269         return 0;
5270
5271 mode1_reset_failed:
5272         dev_err(adev->dev, "GPU mode1 reset failed\n");
5273         return ret;
5274 }
5275
5276 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5277                                  struct amdgpu_reset_context *reset_context)
5278 {
5279         int i, r = 0;
5280         struct amdgpu_job *job = NULL;
5281         bool need_full_reset =
5282                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5283
5284         if (reset_context->reset_req_dev == adev)
5285                 job = reset_context->job;
5286
5287         if (amdgpu_sriov_vf(adev)) {
5288                 /* stop the data exchange thread */
5289                 amdgpu_virt_fini_data_exchange(adev);
5290         }
5291
5292         amdgpu_fence_driver_isr_toggle(adev, true);
5293
5294         /* block all schedulers and reset given job's ring */
5295         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5296                 struct amdgpu_ring *ring = adev->rings[i];
5297
5298                 if (!amdgpu_ring_sched_ready(ring))
5299                         continue;
5300
5301                 /* Clear job fence from fence drv to avoid force_completion
5302                  * leave NULL and vm flush fence in fence drv
5303                  */
5304                 amdgpu_fence_driver_clear_job_fences(ring);
5305
5306                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5307                 amdgpu_fence_driver_force_completion(ring);
5308         }
5309
5310         amdgpu_fence_driver_isr_toggle(adev, false);
5311
5312         if (job && job->vm)
5313                 drm_sched_increase_karma(&job->base);
5314
5315         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5316         /* If reset handler not implemented, continue; otherwise return */
5317         if (r == -EOPNOTSUPP)
5318                 r = 0;
5319         else
5320                 return r;
5321
5322         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5323         if (!amdgpu_sriov_vf(adev)) {
5324
5325                 if (!need_full_reset)
5326                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5327
5328                 if (!need_full_reset && amdgpu_gpu_recovery &&
5329                     amdgpu_device_ip_check_soft_reset(adev)) {
5330                         amdgpu_device_ip_pre_soft_reset(adev);
5331                         r = amdgpu_device_ip_soft_reset(adev);
5332                         amdgpu_device_ip_post_soft_reset(adev);
5333                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5334                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5335                                 need_full_reset = true;
5336                         }
5337                 }
5338
5339                 if (need_full_reset)
5340                         r = amdgpu_device_ip_suspend(adev);
5341                 if (need_full_reset)
5342                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5343                 else
5344                         clear_bit(AMDGPU_NEED_FULL_RESET,
5345                                   &reset_context->flags);
5346         }
5347
5348         return r;
5349 }
5350
5351 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5352 {
5353         int i;
5354
5355         lockdep_assert_held(&adev->reset_domain->sem);
5356
5357         for (i = 0; i < adev->reset_info.num_regs; i++) {
5358                 adev->reset_info.reset_dump_reg_value[i] =
5359                         RREG32(adev->reset_info.reset_dump_reg_list[i]);
5360
5361                 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5362                                              adev->reset_info.reset_dump_reg_value[i]);
5363         }
5364
5365         return 0;
5366 }
5367
5368 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5369                          struct amdgpu_reset_context *reset_context)
5370 {
5371         struct amdgpu_device *tmp_adev = NULL;
5372         bool need_full_reset, skip_hw_reset, vram_lost = false;
5373         int r = 0;
5374         uint32_t i;
5375
5376         /* Try reset handler method first */
5377         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5378                                     reset_list);
5379
5380         if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5381                 amdgpu_reset_reg_dumps(tmp_adev);
5382
5383                 dev_info(tmp_adev->dev, "Dumping IP State\n");
5384                 /* Trigger ip dump before we reset the asic */
5385                 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5386                         if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5387                                 tmp_adev->ip_blocks[i].version->funcs
5388                                 ->dump_ip_state((void *)tmp_adev);
5389                 dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5390         }
5391
5392         reset_context->reset_device_list = device_list_handle;
5393         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5394         /* If reset handler not implemented, continue; otherwise return */
5395         if (r == -EOPNOTSUPP)
5396                 r = 0;
5397         else
5398                 return r;
5399
5400         /* Reset handler not implemented, use the default method */
5401         need_full_reset =
5402                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5403         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5404
5405         /*
5406          * ASIC reset has to be done on all XGMI hive nodes ASAP
5407          * to allow proper links negotiation in FW (within 1 sec)
5408          */
5409         if (!skip_hw_reset && need_full_reset) {
5410                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5411                         /* For XGMI run all resets in parallel to speed up the process */
5412                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5413                                 tmp_adev->gmc.xgmi.pending_reset = false;
5414                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5415                                         r = -EALREADY;
5416                         } else
5417                                 r = amdgpu_asic_reset(tmp_adev);
5418
5419                         if (r) {
5420                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5421                                          r, adev_to_drm(tmp_adev)->unique);
5422                                 goto out;
5423                         }
5424                 }
5425
5426                 /* For XGMI wait for all resets to complete before proceed */
5427                 if (!r) {
5428                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5429                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5430                                         flush_work(&tmp_adev->xgmi_reset_work);
5431                                         r = tmp_adev->asic_reset_res;
5432                                         if (r)
5433                                                 break;
5434                                 }
5435                         }
5436                 }
5437         }
5438
5439         if (!r && amdgpu_ras_intr_triggered()) {
5440                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5441                         amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5442                 }
5443
5444                 amdgpu_ras_intr_cleared();
5445         }
5446
5447         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5448                 if (need_full_reset) {
5449                         /* post card */
5450                         amdgpu_ras_set_fed(tmp_adev, false);
5451                         r = amdgpu_device_asic_init(tmp_adev);
5452                         if (r) {
5453                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
5454                         } else {
5455                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5456
5457                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5458                                 if (r)
5459                                         goto out;
5460
5461                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5462
5463                                 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5464                                         amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5465
5466                                 if (vram_lost) {
5467                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
5468                                         amdgpu_inc_vram_lost(tmp_adev);
5469                                 }
5470
5471                                 r = amdgpu_device_fw_loading(tmp_adev);
5472                                 if (r)
5473                                         return r;
5474
5475                                 r = amdgpu_xcp_restore_partition_mode(
5476                                         tmp_adev->xcp_mgr);
5477                                 if (r)
5478                                         goto out;
5479
5480                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5481                                 if (r)
5482                                         goto out;
5483
5484                                 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5485                                         amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5486
5487                                 if (vram_lost)
5488                                         amdgpu_device_fill_reset_magic(tmp_adev);
5489
5490                                 /*
5491                                  * Add this ASIC as tracked as reset was already
5492                                  * complete successfully.
5493                                  */
5494                                 amdgpu_register_gpu_instance(tmp_adev);
5495
5496                                 if (!reset_context->hive &&
5497                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5498                                         amdgpu_xgmi_add_device(tmp_adev);
5499
5500                                 r = amdgpu_device_ip_late_init(tmp_adev);
5501                                 if (r)
5502                                         goto out;
5503
5504                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5505
5506                                 /*
5507                                  * The GPU enters bad state once faulty pages
5508                                  * by ECC has reached the threshold, and ras
5509                                  * recovery is scheduled next. So add one check
5510                                  * here to break recovery if it indeed exceeds
5511                                  * bad page threshold, and remind user to
5512                                  * retire this GPU or setting one bigger
5513                                  * bad_page_threshold value to fix this once
5514                                  * probing driver again.
5515                                  */
5516                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5517                                         /* must succeed. */
5518                                         amdgpu_ras_resume(tmp_adev);
5519                                 } else {
5520                                         r = -EINVAL;
5521                                         goto out;
5522                                 }
5523
5524                                 /* Update PSP FW topology after reset */
5525                                 if (reset_context->hive &&
5526                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5527                                         r = amdgpu_xgmi_update_topology(
5528                                                 reset_context->hive, tmp_adev);
5529                         }
5530                 }
5531
5532 out:
5533                 if (!r) {
5534                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5535                         r = amdgpu_ib_ring_tests(tmp_adev);
5536                         if (r) {
5537                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5538                                 need_full_reset = true;
5539                                 r = -EAGAIN;
5540                                 goto end;
5541                         }
5542                 }
5543
5544                 if (!r)
5545                         r = amdgpu_device_recover_vram(tmp_adev);
5546                 else
5547                         tmp_adev->asic_reset_res = r;
5548         }
5549
5550 end:
5551         if (need_full_reset)
5552                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5553         else
5554                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5555         return r;
5556 }
5557
5558 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5559 {
5560
5561         switch (amdgpu_asic_reset_method(adev)) {
5562         case AMD_RESET_METHOD_MODE1:
5563                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5564                 break;
5565         case AMD_RESET_METHOD_MODE2:
5566                 adev->mp1_state = PP_MP1_STATE_RESET;
5567                 break;
5568         default:
5569                 adev->mp1_state = PP_MP1_STATE_NONE;
5570                 break;
5571         }
5572 }
5573
5574 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5575 {
5576         amdgpu_vf_error_trans_all(adev);
5577         adev->mp1_state = PP_MP1_STATE_NONE;
5578 }
5579
5580 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5581 {
5582         struct pci_dev *p = NULL;
5583
5584         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5585                         adev->pdev->bus->number, 1);
5586         if (p) {
5587                 pm_runtime_enable(&(p->dev));
5588                 pm_runtime_resume(&(p->dev));
5589         }
5590
5591         pci_dev_put(p);
5592 }
5593
5594 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5595 {
5596         enum amd_reset_method reset_method;
5597         struct pci_dev *p = NULL;
5598         u64 expires;
5599
5600         /*
5601          * For now, only BACO and mode1 reset are confirmed
5602          * to suffer the audio issue without proper suspended.
5603          */
5604         reset_method = amdgpu_asic_reset_method(adev);
5605         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5606              (reset_method != AMD_RESET_METHOD_MODE1))
5607                 return -EINVAL;
5608
5609         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5610                         adev->pdev->bus->number, 1);
5611         if (!p)
5612                 return -ENODEV;
5613
5614         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5615         if (!expires)
5616                 /*
5617                  * If we cannot get the audio device autosuspend delay,
5618                  * a fixed 4S interval will be used. Considering 3S is
5619                  * the audio controller default autosuspend delay setting.
5620                  * 4S used here is guaranteed to cover that.
5621                  */
5622                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5623
5624         while (!pm_runtime_status_suspended(&(p->dev))) {
5625                 if (!pm_runtime_suspend(&(p->dev)))
5626                         break;
5627
5628                 if (expires < ktime_get_mono_fast_ns()) {
5629                         dev_warn(adev->dev, "failed to suspend display audio\n");
5630                         pci_dev_put(p);
5631                         /* TODO: abort the succeeding gpu reset? */
5632                         return -ETIMEDOUT;
5633                 }
5634         }
5635
5636         pm_runtime_disable(&(p->dev));
5637
5638         pci_dev_put(p);
5639         return 0;
5640 }
5641
5642 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5643 {
5644         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5645
5646 #if defined(CONFIG_DEBUG_FS)
5647         if (!amdgpu_sriov_vf(adev))
5648                 cancel_work(&adev->reset_work);
5649 #endif
5650
5651         if (adev->kfd.dev)
5652                 cancel_work(&adev->kfd.reset_work);
5653
5654         if (amdgpu_sriov_vf(adev))
5655                 cancel_work(&adev->virt.flr_work);
5656
5657         if (con && adev->ras_enabled)
5658                 cancel_work(&con->recovery_work);
5659
5660 }
5661
5662 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5663 {
5664         struct amdgpu_device *tmp_adev;
5665         int ret = 0;
5666         u32 status;
5667
5668         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5669                 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5670                 if (PCI_POSSIBLE_ERROR(status)) {
5671                         dev_err(tmp_adev->dev, "device lost from bus!");
5672                         ret = -ENODEV;
5673                 }
5674         }
5675
5676         return ret;
5677 }
5678
5679 /**
5680  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5681  *
5682  * @adev: amdgpu_device pointer
5683  * @job: which job trigger hang
5684  * @reset_context: amdgpu reset context pointer
5685  *
5686  * Attempt to reset the GPU if it has hung (all asics).
5687  * Attempt to do soft-reset or full-reset and reinitialize Asic
5688  * Returns 0 for success or an error on failure.
5689  */
5690
5691 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5692                               struct amdgpu_job *job,
5693                               struct amdgpu_reset_context *reset_context)
5694 {
5695         struct list_head device_list, *device_list_handle =  NULL;
5696         bool job_signaled = false;
5697         struct amdgpu_hive_info *hive = NULL;
5698         struct amdgpu_device *tmp_adev = NULL;
5699         int i, r = 0;
5700         bool need_emergency_restart = false;
5701         bool audio_suspended = false;
5702         int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5703
5704         /*
5705          * Special case: RAS triggered and full reset isn't supported
5706          */
5707         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5708
5709         /*
5710          * Flush RAM to disk so that after reboot
5711          * the user can read log and see why the system rebooted.
5712          */
5713         if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5714                 amdgpu_ras_get_context(adev)->reboot) {
5715                 DRM_WARN("Emergency reboot.");
5716
5717                 ksys_sync_helper();
5718                 emergency_restart();
5719         }
5720
5721         dev_info(adev->dev, "GPU %s begin!\n",
5722                 need_emergency_restart ? "jobs stop":"reset");
5723
5724         if (!amdgpu_sriov_vf(adev))
5725                 hive = amdgpu_get_xgmi_hive(adev);
5726         if (hive)
5727                 mutex_lock(&hive->hive_lock);
5728
5729         reset_context->job = job;
5730         reset_context->hive = hive;
5731         /*
5732          * Build list of devices to reset.
5733          * In case we are in XGMI hive mode, resort the device list
5734          * to put adev in the 1st position.
5735          */
5736         INIT_LIST_HEAD(&device_list);
5737         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5738                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5739                         list_add_tail(&tmp_adev->reset_list, &device_list);
5740                         if (adev->shutdown)
5741                                 tmp_adev->shutdown = true;
5742                 }
5743                 if (!list_is_first(&adev->reset_list, &device_list))
5744                         list_rotate_to_front(&adev->reset_list, &device_list);
5745                 device_list_handle = &device_list;
5746         } else {
5747                 list_add_tail(&adev->reset_list, &device_list);
5748                 device_list_handle = &device_list;
5749         }
5750
5751         if (!amdgpu_sriov_vf(adev)) {
5752                 r = amdgpu_device_health_check(device_list_handle);
5753                 if (r)
5754                         goto end_reset;
5755         }
5756
5757         /* We need to lock reset domain only once both for XGMI and single device */
5758         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5759                                     reset_list);
5760         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5761
5762         /* block all schedulers and reset given job's ring */
5763         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5764
5765                 amdgpu_device_set_mp1_state(tmp_adev);
5766
5767                 /*
5768                  * Try to put the audio codec into suspend state
5769                  * before gpu reset started.
5770                  *
5771                  * Due to the power domain of the graphics device
5772                  * is shared with AZ power domain. Without this,
5773                  * we may change the audio hardware from behind
5774                  * the audio driver's back. That will trigger
5775                  * some audio codec errors.
5776                  */
5777                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5778                         audio_suspended = true;
5779
5780                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5781
5782                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5783
5784                 amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5785
5786                 /*
5787                  * Mark these ASICs to be reseted as untracked first
5788                  * And add them back after reset completed
5789                  */
5790                 amdgpu_unregister_gpu_instance(tmp_adev);
5791
5792                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5793
5794                 /* disable ras on ALL IPs */
5795                 if (!need_emergency_restart &&
5796                       amdgpu_device_ip_need_full_reset(tmp_adev))
5797                         amdgpu_ras_suspend(tmp_adev);
5798
5799                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5800                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5801
5802                         if (!amdgpu_ring_sched_ready(ring))
5803                                 continue;
5804
5805                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5806
5807                         if (need_emergency_restart)
5808                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5809                 }
5810                 atomic_inc(&tmp_adev->gpu_reset_counter);
5811         }
5812
5813         if (need_emergency_restart)
5814                 goto skip_sched_resume;
5815
5816         /*
5817          * Must check guilty signal here since after this point all old
5818          * HW fences are force signaled.
5819          *
5820          * job->base holds a reference to parent fence
5821          */
5822         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5823                 job_signaled = true;
5824                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5825                 goto skip_hw_reset;
5826         }
5827
5828 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5829         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5830                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5831                 /*TODO Should we stop ?*/
5832                 if (r) {
5833                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5834                                   r, adev_to_drm(tmp_adev)->unique);
5835                         tmp_adev->asic_reset_res = r;
5836                 }
5837         }
5838
5839         /* Actual ASIC resets if needed.*/
5840         /* Host driver will handle XGMI hive reset for SRIOV */
5841         if (amdgpu_sriov_vf(adev)) {
5842                 if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5843                         dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5844                         amdgpu_ras_set_fed(adev, true);
5845                         set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5846                 }
5847
5848                 r = amdgpu_device_reset_sriov(adev, reset_context);
5849                 if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5850                         amdgpu_virt_release_full_gpu(adev, true);
5851                         goto retry;
5852                 }
5853                 if (r)
5854                         adev->asic_reset_res = r;
5855         } else {
5856                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5857                 if (r && r == -EAGAIN)
5858                         goto retry;
5859         }
5860
5861         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5862                 /*
5863                  * Drop any pending non scheduler resets queued before reset is done.
5864                  * Any reset scheduled after this point would be valid. Scheduler resets
5865                  * were already dropped during drm_sched_stop and no new ones can come
5866                  * in before drm_sched_start.
5867                  */
5868                 amdgpu_device_stop_pending_resets(tmp_adev);
5869         }
5870
5871 skip_hw_reset:
5872
5873         /* Post ASIC reset for all devs .*/
5874         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5875
5876                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5877                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5878
5879                         if (!amdgpu_ring_sched_ready(ring))
5880                                 continue;
5881
5882                         drm_sched_start(&ring->sched, true);
5883                 }
5884
5885                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5886                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5887
5888                 if (tmp_adev->asic_reset_res)
5889                         r = tmp_adev->asic_reset_res;
5890
5891                 tmp_adev->asic_reset_res = 0;
5892
5893                 if (r) {
5894                         /* bad news, how to tell it to userspace ? */
5895                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5896                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5897                 } else {
5898                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5899                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5900                                 DRM_WARN("smart shift update failed\n");
5901                 }
5902         }
5903
5904 skip_sched_resume:
5905         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5906                 /* unlock kfd: SRIOV would do it separately */
5907                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5908                         amdgpu_amdkfd_post_reset(tmp_adev);
5909
5910                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5911                  * need to bring up kfd here if it's not be initialized before
5912                  */
5913                 if (!adev->kfd.init_complete)
5914                         amdgpu_amdkfd_device_init(adev);
5915
5916                 if (audio_suspended)
5917                         amdgpu_device_resume_display_audio(tmp_adev);
5918
5919                 amdgpu_device_unset_mp1_state(tmp_adev);
5920
5921                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5922         }
5923
5924         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5925                                             reset_list);
5926         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5927
5928 end_reset:
5929         if (hive) {
5930                 mutex_unlock(&hive->hive_lock);
5931                 amdgpu_put_xgmi_hive(hive);
5932         }
5933
5934         if (r)
5935                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5936
5937         atomic_set(&adev->reset_domain->reset_res, r);
5938         return r;
5939 }
5940
5941 /**
5942  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5943  *
5944  * @adev: amdgpu_device pointer
5945  * @speed: pointer to the speed of the link
5946  * @width: pointer to the width of the link
5947  *
5948  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5949  * first physical partner to an AMD dGPU.
5950  * This will exclude any virtual switches and links.
5951  */
5952 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5953                                             enum pci_bus_speed *speed,
5954                                             enum pcie_link_width *width)
5955 {
5956         struct pci_dev *parent = adev->pdev;
5957
5958         if (!speed || !width)
5959                 return;
5960
5961         *speed = PCI_SPEED_UNKNOWN;
5962         *width = PCIE_LNK_WIDTH_UNKNOWN;
5963
5964         if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5965                 while ((parent = pci_upstream_bridge(parent))) {
5966                         /* skip upstream/downstream switches internal to dGPU*/
5967                         if (parent->vendor == PCI_VENDOR_ID_ATI)
5968                                 continue;
5969                         *speed = pcie_get_speed_cap(parent);
5970                         *width = pcie_get_width_cap(parent);
5971                         break;
5972                 }
5973         } else {
5974                 /* use the current speeds rather than max if switching is not supported */
5975                 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
5976         }
5977 }
5978
5979 /**
5980  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5981  *
5982  * @adev: amdgpu_device pointer
5983  *
5984  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5985  * and lanes) of the slot the device is in. Handles APUs and
5986  * virtualized environments where PCIE config space may not be available.
5987  */
5988 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5989 {
5990         struct pci_dev *pdev;
5991         enum pci_bus_speed speed_cap, platform_speed_cap;
5992         enum pcie_link_width platform_link_width;
5993
5994         if (amdgpu_pcie_gen_cap)
5995                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5996
5997         if (amdgpu_pcie_lane_cap)
5998                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5999
6000         /* covers APUs as well */
6001         if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6002                 if (adev->pm.pcie_gen_mask == 0)
6003                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6004                 if (adev->pm.pcie_mlw_mask == 0)
6005                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6006                 return;
6007         }
6008
6009         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6010                 return;
6011
6012         amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6013                                         &platform_link_width);
6014
6015         if (adev->pm.pcie_gen_mask == 0) {
6016                 /* asic caps */
6017                 pdev = adev->pdev;
6018                 speed_cap = pcie_get_speed_cap(pdev);
6019                 if (speed_cap == PCI_SPEED_UNKNOWN) {
6020                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6021                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6022                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6023                 } else {
6024                         if (speed_cap == PCIE_SPEED_32_0GT)
6025                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6026                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6027                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6028                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6029                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6030                         else if (speed_cap == PCIE_SPEED_16_0GT)
6031                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6032                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6033                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6034                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6035                         else if (speed_cap == PCIE_SPEED_8_0GT)
6036                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6037                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6038                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6039                         else if (speed_cap == PCIE_SPEED_5_0GT)
6040                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6041                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6042                         else
6043                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6044                 }
6045                 /* platform caps */
6046                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6047                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6048                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6049                 } else {
6050                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
6051                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6052                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6053                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6054                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6055                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6056                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6057                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6058                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6059                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6060                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6061                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6062                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6063                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6064                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6065                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6066                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6067                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6068                         else
6069                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6070
6071                 }
6072         }
6073         if (adev->pm.pcie_mlw_mask == 0) {
6074                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6075                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6076                 } else {
6077                         switch (platform_link_width) {
6078                         case PCIE_LNK_X32:
6079                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6080                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6081                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6082                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6083                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6084                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6085                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6086                                 break;
6087                         case PCIE_LNK_X16:
6088                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6089                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6090                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6091                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6092                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6093                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6094                                 break;
6095                         case PCIE_LNK_X12:
6096                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6097                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6098                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6099                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6100                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6101                                 break;
6102                         case PCIE_LNK_X8:
6103                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6104                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6105                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6106                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6107                                 break;
6108                         case PCIE_LNK_X4:
6109                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6110                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6111                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6112                                 break;
6113                         case PCIE_LNK_X2:
6114                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6115                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6116                                 break;
6117                         case PCIE_LNK_X1:
6118                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6119                                 break;
6120                         default:
6121                                 break;
6122                         }
6123                 }
6124         }
6125 }
6126
6127 /**
6128  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6129  *
6130  * @adev: amdgpu_device pointer
6131  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6132  *
6133  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6134  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6135  * @peer_adev.
6136  */
6137 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6138                                       struct amdgpu_device *peer_adev)
6139 {
6140 #ifdef CONFIG_HSA_AMD_P2P
6141         uint64_t address_mask = peer_adev->dev->dma_mask ?
6142                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6143         resource_size_t aper_limit =
6144                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6145         bool p2p_access =
6146                 !adev->gmc.xgmi.connected_to_cpu &&
6147                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6148
6149         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6150                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6151                 !(adev->gmc.aper_base & address_mask ||
6152                   aper_limit & address_mask));
6153 #else
6154         return false;
6155 #endif
6156 }
6157
6158 int amdgpu_device_baco_enter(struct drm_device *dev)
6159 {
6160         struct amdgpu_device *adev = drm_to_adev(dev);
6161         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6162
6163         if (!amdgpu_device_supports_baco(dev))
6164                 return -ENOTSUPP;
6165
6166         if (ras && adev->ras_enabled &&
6167             adev->nbio.funcs->enable_doorbell_interrupt)
6168                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6169
6170         return amdgpu_dpm_baco_enter(adev);
6171 }
6172
6173 int amdgpu_device_baco_exit(struct drm_device *dev)
6174 {
6175         struct amdgpu_device *adev = drm_to_adev(dev);
6176         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6177         int ret = 0;
6178
6179         if (!amdgpu_device_supports_baco(dev))
6180                 return -ENOTSUPP;
6181
6182         ret = amdgpu_dpm_baco_exit(adev);
6183         if (ret)
6184                 return ret;
6185
6186         if (ras && adev->ras_enabled &&
6187             adev->nbio.funcs->enable_doorbell_interrupt)
6188                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6189
6190         if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6191             adev->nbio.funcs->clear_doorbell_interrupt)
6192                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6193
6194         return 0;
6195 }
6196
6197 /**
6198  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6199  * @pdev: PCI device struct
6200  * @state: PCI channel state
6201  *
6202  * Description: Called when a PCI error is detected.
6203  *
6204  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6205  */
6206 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6207 {
6208         struct drm_device *dev = pci_get_drvdata(pdev);
6209         struct amdgpu_device *adev = drm_to_adev(dev);
6210         int i;
6211
6212         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6213
6214         if (adev->gmc.xgmi.num_physical_nodes > 1) {
6215                 DRM_WARN("No support for XGMI hive yet...");
6216                 return PCI_ERS_RESULT_DISCONNECT;
6217         }
6218
6219         adev->pci_channel_state = state;
6220
6221         switch (state) {
6222         case pci_channel_io_normal:
6223                 return PCI_ERS_RESULT_CAN_RECOVER;
6224         /* Fatal error, prepare for slot reset */
6225         case pci_channel_io_frozen:
6226                 /*
6227                  * Locking adev->reset_domain->sem will prevent any external access
6228                  * to GPU during PCI error recovery
6229                  */
6230                 amdgpu_device_lock_reset_domain(adev->reset_domain);
6231                 amdgpu_device_set_mp1_state(adev);
6232
6233                 /*
6234                  * Block any work scheduling as we do for regular GPU reset
6235                  * for the duration of the recovery
6236                  */
6237                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6238                         struct amdgpu_ring *ring = adev->rings[i];
6239
6240                         if (!amdgpu_ring_sched_ready(ring))
6241                                 continue;
6242
6243                         drm_sched_stop(&ring->sched, NULL);
6244                 }
6245                 atomic_inc(&adev->gpu_reset_counter);
6246                 return PCI_ERS_RESULT_NEED_RESET;
6247         case pci_channel_io_perm_failure:
6248                 /* Permanent error, prepare for device removal */
6249                 return PCI_ERS_RESULT_DISCONNECT;
6250         }
6251
6252         return PCI_ERS_RESULT_NEED_RESET;
6253 }
6254
6255 /**
6256  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6257  * @pdev: pointer to PCI device
6258  */
6259 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6260 {
6261
6262         DRM_INFO("PCI error: mmio enabled callback!!\n");
6263
6264         /* TODO - dump whatever for debugging purposes */
6265
6266         /* This called only if amdgpu_pci_error_detected returns
6267          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6268          * works, no need to reset slot.
6269          */
6270
6271         return PCI_ERS_RESULT_RECOVERED;
6272 }
6273
6274 /**
6275  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6276  * @pdev: PCI device struct
6277  *
6278  * Description: This routine is called by the pci error recovery
6279  * code after the PCI slot has been reset, just before we
6280  * should resume normal operations.
6281  */
6282 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6283 {
6284         struct drm_device *dev = pci_get_drvdata(pdev);
6285         struct amdgpu_device *adev = drm_to_adev(dev);
6286         int r, i;
6287         struct amdgpu_reset_context reset_context;
6288         u32 memsize;
6289         struct list_head device_list;
6290
6291         /* PCI error slot reset should be skipped During RAS recovery */
6292         if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6293             amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6294             amdgpu_ras_in_recovery(adev))
6295                 return PCI_ERS_RESULT_RECOVERED;
6296
6297         DRM_INFO("PCI error: slot reset callback!!\n");
6298
6299         memset(&reset_context, 0, sizeof(reset_context));
6300
6301         INIT_LIST_HEAD(&device_list);
6302         list_add_tail(&adev->reset_list, &device_list);
6303
6304         /* wait for asic to come out of reset */
6305         msleep(500);
6306
6307         /* Restore PCI confspace */
6308         amdgpu_device_load_pci_state(pdev);
6309
6310         /* confirm  ASIC came out of reset */
6311         for (i = 0; i < adev->usec_timeout; i++) {
6312                 memsize = amdgpu_asic_get_config_memsize(adev);
6313
6314                 if (memsize != 0xffffffff)
6315                         break;
6316                 udelay(1);
6317         }
6318         if (memsize == 0xffffffff) {
6319                 r = -ETIME;
6320                 goto out;
6321         }
6322
6323         reset_context.method = AMD_RESET_METHOD_NONE;
6324         reset_context.reset_req_dev = adev;
6325         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6326         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6327
6328         adev->no_hw_access = true;
6329         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6330         adev->no_hw_access = false;
6331         if (r)
6332                 goto out;
6333
6334         r = amdgpu_do_asic_reset(&device_list, &reset_context);
6335
6336 out:
6337         if (!r) {
6338                 if (amdgpu_device_cache_pci_state(adev->pdev))
6339                         pci_restore_state(adev->pdev);
6340
6341                 DRM_INFO("PCIe error recovery succeeded\n");
6342         } else {
6343                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
6344                 amdgpu_device_unset_mp1_state(adev);
6345                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6346         }
6347
6348         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6349 }
6350
6351 /**
6352  * amdgpu_pci_resume() - resume normal ops after PCI reset
6353  * @pdev: pointer to PCI device
6354  *
6355  * Called when the error recovery driver tells us that its
6356  * OK to resume normal operation.
6357  */
6358 void amdgpu_pci_resume(struct pci_dev *pdev)
6359 {
6360         struct drm_device *dev = pci_get_drvdata(pdev);
6361         struct amdgpu_device *adev = drm_to_adev(dev);
6362         int i;
6363
6364
6365         DRM_INFO("PCI error: resume callback!!\n");
6366
6367         /* Only continue execution for the case of pci_channel_io_frozen */
6368         if (adev->pci_channel_state != pci_channel_io_frozen)
6369                 return;
6370
6371         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6372                 struct amdgpu_ring *ring = adev->rings[i];
6373
6374                 if (!amdgpu_ring_sched_ready(ring))
6375                         continue;
6376
6377                 drm_sched_start(&ring->sched, true);
6378         }
6379
6380         amdgpu_device_unset_mp1_state(adev);
6381         amdgpu_device_unlock_reset_domain(adev->reset_domain);
6382 }
6383
6384 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6385 {
6386         struct drm_device *dev = pci_get_drvdata(pdev);
6387         struct amdgpu_device *adev = drm_to_adev(dev);
6388         int r;
6389
6390         r = pci_save_state(pdev);
6391         if (!r) {
6392                 kfree(adev->pci_state);
6393
6394                 adev->pci_state = pci_store_saved_state(pdev);
6395
6396                 if (!adev->pci_state) {
6397                         DRM_ERROR("Failed to store PCI saved state");
6398                         return false;
6399                 }
6400         } else {
6401                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6402                 return false;
6403         }
6404
6405         return true;
6406 }
6407
6408 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6409 {
6410         struct drm_device *dev = pci_get_drvdata(pdev);
6411         struct amdgpu_device *adev = drm_to_adev(dev);
6412         int r;
6413
6414         if (!adev->pci_state)
6415                 return false;
6416
6417         r = pci_load_saved_state(pdev, adev->pci_state);
6418
6419         if (!r) {
6420                 pci_restore_state(pdev);
6421         } else {
6422                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6423                 return false;
6424         }
6425
6426         return true;
6427 }
6428
6429 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6430                 struct amdgpu_ring *ring)
6431 {
6432 #ifdef CONFIG_X86_64
6433         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6434                 return;
6435 #endif
6436         if (adev->gmc.xgmi.connected_to_cpu)
6437                 return;
6438
6439         if (ring && ring->funcs->emit_hdp_flush)
6440                 amdgpu_ring_emit_hdp_flush(ring);
6441         else
6442                 amdgpu_asic_flush_hdp(adev, ring);
6443 }
6444
6445 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6446                 struct amdgpu_ring *ring)
6447 {
6448 #ifdef CONFIG_X86_64
6449         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6450                 return;
6451 #endif
6452         if (adev->gmc.xgmi.connected_to_cpu)
6453                 return;
6454
6455         amdgpu_asic_invalidate_hdp(adev, ring);
6456 }
6457
6458 int amdgpu_in_reset(struct amdgpu_device *adev)
6459 {
6460         return atomic_read(&adev->reset_domain->in_gpu_reset);
6461 }
6462
6463 /**
6464  * amdgpu_device_halt() - bring hardware to some kind of halt state
6465  *
6466  * @adev: amdgpu_device pointer
6467  *
6468  * Bring hardware to some kind of halt state so that no one can touch it
6469  * any more. It will help to maintain error context when error occurred.
6470  * Compare to a simple hang, the system will keep stable at least for SSH
6471  * access. Then it should be trivial to inspect the hardware state and
6472  * see what's going on. Implemented as following:
6473  *
6474  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6475  *    clears all CPU mappings to device, disallows remappings through page faults
6476  * 2. amdgpu_irq_disable_all() disables all interrupts
6477  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6478  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6479  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6480  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6481  *    flush any in flight DMA operations
6482  */
6483 void amdgpu_device_halt(struct amdgpu_device *adev)
6484 {
6485         struct pci_dev *pdev = adev->pdev;
6486         struct drm_device *ddev = adev_to_drm(adev);
6487
6488         amdgpu_xcp_dev_unplug(adev);
6489         drm_dev_unplug(ddev);
6490
6491         amdgpu_irq_disable_all(adev);
6492
6493         amdgpu_fence_driver_hw_fini(adev);
6494
6495         adev->no_hw_access = true;
6496
6497         amdgpu_device_unmap_mmio(adev);
6498
6499         pci_disable_device(pdev);
6500         pci_wait_for_pending_transaction(pdev);
6501 }
6502
6503 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6504                                 u32 reg)
6505 {
6506         unsigned long flags, address, data;
6507         u32 r;
6508
6509         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6510         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6511
6512         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6513         WREG32(address, reg * 4);
6514         (void)RREG32(address);
6515         r = RREG32(data);
6516         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6517         return r;
6518 }
6519
6520 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6521                                 u32 reg, u32 v)
6522 {
6523         unsigned long flags, address, data;
6524
6525         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6526         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6527
6528         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6529         WREG32(address, reg * 4);
6530         (void)RREG32(address);
6531         WREG32(data, v);
6532         (void)RREG32(data);
6533         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6534 }
6535
6536 /**
6537  * amdgpu_device_get_gang - return a reference to the current gang
6538  * @adev: amdgpu_device pointer
6539  *
6540  * Returns: A new reference to the current gang leader.
6541  */
6542 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6543 {
6544         struct dma_fence *fence;
6545
6546         rcu_read_lock();
6547         fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6548         rcu_read_unlock();
6549         return fence;
6550 }
6551
6552 /**
6553  * amdgpu_device_switch_gang - switch to a new gang
6554  * @adev: amdgpu_device pointer
6555  * @gang: the gang to switch to
6556  *
6557  * Try to switch to a new gang.
6558  * Returns: NULL if we switched to the new gang or a reference to the current
6559  * gang leader.
6560  */
6561 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6562                                             struct dma_fence *gang)
6563 {
6564         struct dma_fence *old = NULL;
6565
6566         do {
6567                 dma_fence_put(old);
6568                 old = amdgpu_device_get_gang(adev);
6569                 if (old == gang)
6570                         break;
6571
6572                 if (!dma_fence_is_signaled(old))
6573                         return old;
6574
6575         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6576                          old, gang) != old);
6577
6578         dma_fence_put(old);
6579         return NULL;
6580 }
6581
6582 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6583 {
6584         switch (adev->asic_type) {
6585 #ifdef CONFIG_DRM_AMDGPU_SI
6586         case CHIP_HAINAN:
6587 #endif
6588         case CHIP_TOPAZ:
6589                 /* chips with no display hardware */
6590                 return false;
6591 #ifdef CONFIG_DRM_AMDGPU_SI
6592         case CHIP_TAHITI:
6593         case CHIP_PITCAIRN:
6594         case CHIP_VERDE:
6595         case CHIP_OLAND:
6596 #endif
6597 #ifdef CONFIG_DRM_AMDGPU_CIK
6598         case CHIP_BONAIRE:
6599         case CHIP_HAWAII:
6600         case CHIP_KAVERI:
6601         case CHIP_KABINI:
6602         case CHIP_MULLINS:
6603 #endif
6604         case CHIP_TONGA:
6605         case CHIP_FIJI:
6606         case CHIP_POLARIS10:
6607         case CHIP_POLARIS11:
6608         case CHIP_POLARIS12:
6609         case CHIP_VEGAM:
6610         case CHIP_CARRIZO:
6611         case CHIP_STONEY:
6612                 /* chips with display hardware */
6613                 return true;
6614         default:
6615                 /* IP discovery */
6616                 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6617                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6618                         return false;
6619                 return true;
6620         }
6621 }
6622
6623 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6624                 uint32_t inst, uint32_t reg_addr, char reg_name[],
6625                 uint32_t expected_value, uint32_t mask)
6626 {
6627         uint32_t ret = 0;
6628         uint32_t old_ = 0;
6629         uint32_t tmp_ = RREG32(reg_addr);
6630         uint32_t loop = adev->usec_timeout;
6631
6632         while ((tmp_ & (mask)) != (expected_value)) {
6633                 if (old_ != tmp_) {
6634                         loop = adev->usec_timeout;
6635                         old_ = tmp_;
6636                 } else
6637                         udelay(1);
6638                 tmp_ = RREG32(reg_addr);
6639                 loop--;
6640                 if (!loop) {
6641                         DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6642                                   inst, reg_name, (uint32_t)expected_value,
6643                                   (uint32_t)(tmp_ & (mask)));
6644                         ret = -ETIMEDOUT;
6645                         break;
6646                 }
6647         }
6648         return ret;
6649 }