2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
58 #ifdef CONFIG_DRM_AMDGPU_CIK
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
83 #include <drm/drm_drv.h>
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
97 #define AMDGPU_RESUME_MS 2000
98 #define AMDGPU_MAX_RETRY_LIMIT 2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
104 static const struct drm_driver amdgpu_kms_driver;
106 const char *amdgpu_asic_name[] = {
147 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
149 * Default init level where all blocks are expected to be initialized. This is
150 * the level of initialization expected by default and also after a full reset
153 struct amdgpu_init_level amdgpu_init_default = {
154 .level = AMDGPU_INIT_LEVEL_DEFAULT,
155 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
159 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
160 * is used for cases like reset on initialization where the entire hive needs to
161 * be reset before first use.
163 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
164 .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
165 .hwini_ip_block_mask =
166 BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
167 BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
168 BIT(AMD_IP_BLOCK_TYPE_PSP)
171 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
172 enum amd_ip_block_type block)
174 return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
177 void amdgpu_set_init_level(struct amdgpu_device *adev,
178 enum amdgpu_init_lvl_id lvl)
181 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
182 adev->init_lvl = &amdgpu_init_minimal_xgmi;
184 case AMDGPU_INIT_LEVEL_DEFAULT:
187 adev->init_lvl = &amdgpu_init_default;
192 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
195 * DOC: pcie_replay_count
197 * The amdgpu driver provides a sysfs API for reporting the total number
198 * of PCIe replays (NAKs)
199 * The file pcie_replay_count is used for this and returns the total
200 * number of replays as a sum of the NAKs generated and NAKs received
203 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
204 struct device_attribute *attr, char *buf)
206 struct drm_device *ddev = dev_get_drvdata(dev);
207 struct amdgpu_device *adev = drm_to_adev(ddev);
208 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
210 return sysfs_emit(buf, "%llu\n", cnt);
213 static DEVICE_ATTR(pcie_replay_count, 0444,
214 amdgpu_device_get_pcie_replay_count, NULL);
216 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
217 struct bin_attribute *attr, char *buf,
218 loff_t ppos, size_t count)
220 struct device *dev = kobj_to_dev(kobj);
221 struct drm_device *ddev = dev_get_drvdata(dev);
222 struct amdgpu_device *adev = drm_to_adev(ddev);
226 case AMDGPU_SYS_REG_STATE_XGMI:
227 bytes_read = amdgpu_asic_get_reg_state(
228 adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
230 case AMDGPU_SYS_REG_STATE_WAFL:
231 bytes_read = amdgpu_asic_get_reg_state(
232 adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
234 case AMDGPU_SYS_REG_STATE_PCIE:
235 bytes_read = amdgpu_asic_get_reg_state(
236 adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
238 case AMDGPU_SYS_REG_STATE_USR:
239 bytes_read = amdgpu_asic_get_reg_state(
240 adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
242 case AMDGPU_SYS_REG_STATE_USR_1:
243 bytes_read = amdgpu_asic_get_reg_state(
244 adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
253 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
254 AMDGPU_SYS_REG_STATE_END);
256 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
260 if (!amdgpu_asic_get_reg_state_supported(adev))
263 ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
268 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
270 if (!amdgpu_asic_get_reg_state_supported(adev))
272 sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
275 int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
279 if (ip_block->version->funcs->suspend) {
280 r = ip_block->version->funcs->suspend(ip_block);
282 dev_err(ip_block->adev->dev,
283 "suspend of IP block <%s> failed %d\n",
284 ip_block->version->funcs->name, r);
289 ip_block->status.hw = false;
293 int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
297 if (ip_block->version->funcs->resume) {
298 r = ip_block->version->funcs->resume(ip_block);
300 dev_err(ip_block->adev->dev,
301 "resume of IP block <%s> failed %d\n",
302 ip_block->version->funcs->name, r);
307 ip_block->status.hw = true;
314 * The amdgpu driver provides a sysfs API for giving board related information.
315 * It provides the form factor information in the format
319 * Possible form factor values
321 * - "cem" - PCIE CEM card
322 * - "oam" - Open Compute Accelerator Module
323 * - "unknown" - Not known
327 static ssize_t amdgpu_device_get_board_info(struct device *dev,
328 struct device_attribute *attr,
331 struct drm_device *ddev = dev_get_drvdata(dev);
332 struct amdgpu_device *adev = drm_to_adev(ddev);
333 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
336 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
337 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
340 case AMDGPU_PKG_TYPE_CEM:
343 case AMDGPU_PKG_TYPE_OAM:
351 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
354 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
356 static struct attribute *amdgpu_board_attrs[] = {
357 &dev_attr_board_info.attr,
361 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
362 struct attribute *attr, int n)
364 struct device *dev = kobj_to_dev(kobj);
365 struct drm_device *ddev = dev_get_drvdata(dev);
366 struct amdgpu_device *adev = drm_to_adev(ddev);
368 if (adev->flags & AMD_IS_APU)
374 static const struct attribute_group amdgpu_board_attrs_group = {
375 .attrs = amdgpu_board_attrs,
376 .is_visible = amdgpu_board_attrs_is_visible
379 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
383 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
385 * @dev: drm_device pointer
387 * Returns true if the device is a dGPU with ATPX power control,
388 * otherwise return false.
390 bool amdgpu_device_supports_px(struct drm_device *dev)
392 struct amdgpu_device *adev = drm_to_adev(dev);
394 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
400 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
402 * @dev: drm_device pointer
404 * Returns true if the device is a dGPU with ACPI power control,
405 * otherwise return false.
407 bool amdgpu_device_supports_boco(struct drm_device *dev)
409 struct amdgpu_device *adev = drm_to_adev(dev);
412 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
418 * amdgpu_device_supports_baco - Does the device support BACO
420 * @dev: drm_device pointer
423 * 1 if the device supporte BACO;
424 * 3 if the device support MACO (only works if BACO is supported)
425 * otherwise return 0.
427 int amdgpu_device_supports_baco(struct drm_device *dev)
429 struct amdgpu_device *adev = drm_to_adev(dev);
431 return amdgpu_asic_supports_baco(adev);
434 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
436 struct drm_device *dev;
439 dev = adev_to_drm(adev);
441 adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
442 bamaco_support = amdgpu_device_supports_baco(dev);
444 switch (amdgpu_runtime_pm) {
446 if (bamaco_support & MACO_SUPPORT) {
447 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
448 dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
449 } else if (bamaco_support == BACO_SUPPORT) {
450 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
451 dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
455 if (bamaco_support & BACO_SUPPORT) {
456 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
457 dev_info(adev->dev, "Forcing BACO for runtime pm\n");
462 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
463 adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
464 dev_info(adev->dev, "Using ATPX for runtime pm\n");
465 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
466 adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
467 dev_info(adev->dev, "Using BOCO for runtime pm\n");
472 switch (adev->asic_type) {
475 /* BACO are not supported on vega20 and arctrus */
478 /* enable BACO as runpm mode if noretry=0 */
479 if (!adev->gmc.noretry)
480 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
483 /* enable BACO as runpm mode on CI+ */
484 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
488 if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
489 if (bamaco_support & MACO_SUPPORT) {
490 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
491 dev_info(adev->dev, "Using BAMACO for runtime pm\n");
493 dev_info(adev->dev, "Using BACO for runtime pm\n");
499 dev_info(adev->dev, "runtime pm is manually disabled\n");
506 if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
507 dev_info(adev->dev, "Runtime PM not available\n");
510 * amdgpu_device_supports_smart_shift - Is the device dGPU with
511 * smart shift support
513 * @dev: drm_device pointer
515 * Returns true if the device is a dGPU with Smart Shift support,
516 * otherwise returns false.
518 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
520 return (amdgpu_device_supports_boco(dev) &&
521 amdgpu_acpi_is_power_shift_control_supported());
525 * VRAM access helper functions
529 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
531 * @adev: amdgpu_device pointer
532 * @pos: offset of the buffer in vram
533 * @buf: virtual address of the buffer in system memory
534 * @size: read/write size, sizeof(@buf) must > @size
535 * @write: true - write to vram, otherwise - read from vram
537 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
538 void *buf, size_t size, bool write)
541 uint32_t hi = ~0, tmp = 0;
542 uint32_t *data = buf;
546 if (!drm_dev_enter(adev_to_drm(adev), &idx))
549 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
551 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
552 for (last = pos + size; pos < last; pos += 4) {
555 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
557 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
561 WREG32_NO_KIQ(mmMM_DATA, *data++);
563 *data++ = RREG32_NO_KIQ(mmMM_DATA);
566 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
571 * amdgpu_device_aper_access - access vram by vram aperature
573 * @adev: amdgpu_device pointer
574 * @pos: offset of the buffer in vram
575 * @buf: virtual address of the buffer in system memory
576 * @size: read/write size, sizeof(@buf) must > @size
577 * @write: true - write to vram, otherwise - read from vram
579 * The return value means how many bytes have been transferred.
581 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
582 void *buf, size_t size, bool write)
589 if (!adev->mman.aper_base_kaddr)
592 last = min(pos + size, adev->gmc.visible_vram_size);
594 addr = adev->mman.aper_base_kaddr + pos;
598 memcpy_toio(addr, buf, count);
599 /* Make sure HDP write cache flush happens without any reordering
600 * after the system memory contents are sent over PCIe device
603 amdgpu_device_flush_hdp(adev, NULL);
605 amdgpu_device_invalidate_hdp(adev, NULL);
606 /* Make sure HDP read cache is invalidated before issuing a read
610 memcpy_fromio(buf, addr, count);
622 * amdgpu_device_vram_access - read/write a buffer in vram
624 * @adev: amdgpu_device pointer
625 * @pos: offset of the buffer in vram
626 * @buf: virtual address of the buffer in system memory
627 * @size: read/write size, sizeof(@buf) must > @size
628 * @write: true - write to vram, otherwise - read from vram
630 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
631 void *buf, size_t size, bool write)
635 /* try to using vram apreature to access vram first */
636 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
639 /* using MM to access rest vram */
642 amdgpu_device_mm_access(adev, pos, buf, size, write);
647 * register access helper functions.
650 /* Check if hw access should be skipped because of hotplug or device error */
651 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
653 if (adev->no_hw_access)
656 #ifdef CONFIG_LOCKDEP
658 * This is a bit complicated to understand, so worth a comment. What we assert
659 * here is that the GPU reset is not running on another thread in parallel.
661 * For this we trylock the read side of the reset semaphore, if that succeeds
662 * we know that the reset is not running in paralell.
664 * If the trylock fails we assert that we are either already holding the read
665 * side of the lock or are the reset thread itself and hold the write side of
669 if (down_read_trylock(&adev->reset_domain->sem))
670 up_read(&adev->reset_domain->sem);
672 lockdep_assert_held(&adev->reset_domain->sem);
679 * amdgpu_device_rreg - read a memory mapped IO or indirect register
681 * @adev: amdgpu_device pointer
682 * @reg: dword aligned register offset
683 * @acc_flags: access flags which require special behavior
685 * Returns the 32 bit value from the offset specified.
687 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
688 uint32_t reg, uint32_t acc_flags)
692 if (amdgpu_device_skip_hw_access(adev))
695 if ((reg * 4) < adev->rmmio_size) {
696 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
697 amdgpu_sriov_runtime(adev) &&
698 down_read_trylock(&adev->reset_domain->sem)) {
699 ret = amdgpu_kiq_rreg(adev, reg, 0);
700 up_read(&adev->reset_domain->sem);
702 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
705 ret = adev->pcie_rreg(adev, reg * 4);
708 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
714 * MMIO register read with bytes helper functions
715 * @offset:bytes offset from MMIO start
719 * amdgpu_mm_rreg8 - read a memory mapped IO register
721 * @adev: amdgpu_device pointer
722 * @offset: byte aligned register offset
724 * Returns the 8 bit value from the offset specified.
726 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
728 if (amdgpu_device_skip_hw_access(adev))
731 if (offset < adev->rmmio_size)
732 return (readb(adev->rmmio + offset));
738 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
740 * @adev: amdgpu_device pointer
741 * @reg: dword aligned register offset
742 * @acc_flags: access flags which require special behavior
743 * @xcc_id: xcc accelerated compute core id
745 * Returns the 32 bit value from the offset specified.
747 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
748 uint32_t reg, uint32_t acc_flags,
751 uint32_t ret, rlcg_flag;
753 if (amdgpu_device_skip_hw_access(adev))
756 if ((reg * 4) < adev->rmmio_size) {
757 if (amdgpu_sriov_vf(adev) &&
758 !amdgpu_sriov_runtime(adev) &&
759 adev->gfx.rlc.rlcg_reg_access_supported &&
760 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
763 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
764 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
765 amdgpu_sriov_runtime(adev) &&
766 down_read_trylock(&adev->reset_domain->sem)) {
767 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
768 up_read(&adev->reset_domain->sem);
770 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
773 ret = adev->pcie_rreg(adev, reg * 4);
780 * MMIO register write with bytes helper functions
781 * @offset:bytes offset from MMIO start
782 * @value: the value want to be written to the register
786 * amdgpu_mm_wreg8 - read a memory mapped IO register
788 * @adev: amdgpu_device pointer
789 * @offset: byte aligned register offset
790 * @value: 8 bit value to write
792 * Writes the value specified to the offset specified.
794 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
796 if (amdgpu_device_skip_hw_access(adev))
799 if (offset < adev->rmmio_size)
800 writeb(value, adev->rmmio + offset);
806 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
808 * @adev: amdgpu_device pointer
809 * @reg: dword aligned register offset
810 * @v: 32 bit value to write to the register
811 * @acc_flags: access flags which require special behavior
813 * Writes the value specified to the offset specified.
815 void amdgpu_device_wreg(struct amdgpu_device *adev,
816 uint32_t reg, uint32_t v,
819 if (amdgpu_device_skip_hw_access(adev))
822 if ((reg * 4) < adev->rmmio_size) {
823 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
824 amdgpu_sriov_runtime(adev) &&
825 down_read_trylock(&adev->reset_domain->sem)) {
826 amdgpu_kiq_wreg(adev, reg, v, 0);
827 up_read(&adev->reset_domain->sem);
829 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
832 adev->pcie_wreg(adev, reg * 4, v);
835 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
839 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
841 * @adev: amdgpu_device pointer
842 * @reg: mmio/rlc register
844 * @xcc_id: xcc accelerated compute core id
846 * this function is invoked only for the debugfs register access
848 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
849 uint32_t reg, uint32_t v,
852 if (amdgpu_device_skip_hw_access(adev))
855 if (amdgpu_sriov_fullaccess(adev) &&
856 adev->gfx.rlc.funcs &&
857 adev->gfx.rlc.funcs->is_rlcg_access_range) {
858 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
859 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
860 } else if ((reg * 4) >= adev->rmmio_size) {
861 adev->pcie_wreg(adev, reg * 4, v);
863 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
868 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
870 * @adev: amdgpu_device pointer
871 * @reg: dword aligned register offset
872 * @v: 32 bit value to write to the register
873 * @acc_flags: access flags which require special behavior
874 * @xcc_id: xcc accelerated compute core id
876 * Writes the value specified to the offset specified.
878 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
879 uint32_t reg, uint32_t v,
880 uint32_t acc_flags, uint32_t xcc_id)
884 if (amdgpu_device_skip_hw_access(adev))
887 if ((reg * 4) < adev->rmmio_size) {
888 if (amdgpu_sriov_vf(adev) &&
889 !amdgpu_sriov_runtime(adev) &&
890 adev->gfx.rlc.rlcg_reg_access_supported &&
891 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
894 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
895 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
896 amdgpu_sriov_runtime(adev) &&
897 down_read_trylock(&adev->reset_domain->sem)) {
898 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
899 up_read(&adev->reset_domain->sem);
901 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
904 adev->pcie_wreg(adev, reg * 4, v);
909 * amdgpu_device_indirect_rreg - read an indirect register
911 * @adev: amdgpu_device pointer
912 * @reg_addr: indirect register address to read from
914 * Returns the value of indirect register @reg_addr
916 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
919 unsigned long flags, pcie_index, pcie_data;
920 void __iomem *pcie_index_offset;
921 void __iomem *pcie_data_offset;
924 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
925 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
927 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
928 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
929 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
931 writel(reg_addr, pcie_index_offset);
932 readl(pcie_index_offset);
933 r = readl(pcie_data_offset);
934 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
939 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
942 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
944 void __iomem *pcie_index_offset;
945 void __iomem *pcie_index_hi_offset;
946 void __iomem *pcie_data_offset;
948 if (unlikely(!adev->nbio.funcs)) {
949 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
950 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
952 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
953 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
956 if (reg_addr >> 32) {
957 if (unlikely(!adev->nbio.funcs))
958 pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
960 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
965 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
966 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
967 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
968 if (pcie_index_hi != 0)
969 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
972 writel(reg_addr, pcie_index_offset);
973 readl(pcie_index_offset);
974 if (pcie_index_hi != 0) {
975 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
976 readl(pcie_index_hi_offset);
978 r = readl(pcie_data_offset);
980 /* clear the high bits */
981 if (pcie_index_hi != 0) {
982 writel(0, pcie_index_hi_offset);
983 readl(pcie_index_hi_offset);
986 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
992 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
994 * @adev: amdgpu_device pointer
995 * @reg_addr: indirect register address to read from
997 * Returns the value of indirect register @reg_addr
999 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1002 unsigned long flags, pcie_index, pcie_data;
1003 void __iomem *pcie_index_offset;
1004 void __iomem *pcie_data_offset;
1007 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1008 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1010 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1011 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1012 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1014 /* read low 32 bits */
1015 writel(reg_addr, pcie_index_offset);
1016 readl(pcie_index_offset);
1017 r = readl(pcie_data_offset);
1018 /* read high 32 bits */
1019 writel(reg_addr + 4, pcie_index_offset);
1020 readl(pcie_index_offset);
1021 r |= ((u64)readl(pcie_data_offset) << 32);
1022 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1027 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1030 unsigned long flags, pcie_index, pcie_data;
1031 unsigned long pcie_index_hi = 0;
1032 void __iomem *pcie_index_offset;
1033 void __iomem *pcie_index_hi_offset;
1034 void __iomem *pcie_data_offset;
1037 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1038 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1039 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1040 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1042 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1043 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1044 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1045 if (pcie_index_hi != 0)
1046 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1049 /* read low 32 bits */
1050 writel(reg_addr, pcie_index_offset);
1051 readl(pcie_index_offset);
1052 if (pcie_index_hi != 0) {
1053 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1054 readl(pcie_index_hi_offset);
1056 r = readl(pcie_data_offset);
1057 /* read high 32 bits */
1058 writel(reg_addr + 4, pcie_index_offset);
1059 readl(pcie_index_offset);
1060 if (pcie_index_hi != 0) {
1061 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1062 readl(pcie_index_hi_offset);
1064 r |= ((u64)readl(pcie_data_offset) << 32);
1066 /* clear the high bits */
1067 if (pcie_index_hi != 0) {
1068 writel(0, pcie_index_hi_offset);
1069 readl(pcie_index_hi_offset);
1072 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1078 * amdgpu_device_indirect_wreg - write an indirect register address
1080 * @adev: amdgpu_device pointer
1081 * @reg_addr: indirect register offset
1082 * @reg_data: indirect register data
1085 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1086 u32 reg_addr, u32 reg_data)
1088 unsigned long flags, pcie_index, pcie_data;
1089 void __iomem *pcie_index_offset;
1090 void __iomem *pcie_data_offset;
1092 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1093 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1095 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1096 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1097 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1099 writel(reg_addr, pcie_index_offset);
1100 readl(pcie_index_offset);
1101 writel(reg_data, pcie_data_offset);
1102 readl(pcie_data_offset);
1103 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1106 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1107 u64 reg_addr, u32 reg_data)
1109 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1110 void __iomem *pcie_index_offset;
1111 void __iomem *pcie_index_hi_offset;
1112 void __iomem *pcie_data_offset;
1114 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1115 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1116 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1117 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1121 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1122 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1123 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1124 if (pcie_index_hi != 0)
1125 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1128 writel(reg_addr, pcie_index_offset);
1129 readl(pcie_index_offset);
1130 if (pcie_index_hi != 0) {
1131 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1132 readl(pcie_index_hi_offset);
1134 writel(reg_data, pcie_data_offset);
1135 readl(pcie_data_offset);
1137 /* clear the high bits */
1138 if (pcie_index_hi != 0) {
1139 writel(0, pcie_index_hi_offset);
1140 readl(pcie_index_hi_offset);
1143 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1147 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1149 * @adev: amdgpu_device pointer
1150 * @reg_addr: indirect register offset
1151 * @reg_data: indirect register data
1154 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1155 u32 reg_addr, u64 reg_data)
1157 unsigned long flags, pcie_index, pcie_data;
1158 void __iomem *pcie_index_offset;
1159 void __iomem *pcie_data_offset;
1161 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1162 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1164 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1165 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1166 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1168 /* write low 32 bits */
1169 writel(reg_addr, pcie_index_offset);
1170 readl(pcie_index_offset);
1171 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1172 readl(pcie_data_offset);
1173 /* write high 32 bits */
1174 writel(reg_addr + 4, pcie_index_offset);
1175 readl(pcie_index_offset);
1176 writel((u32)(reg_data >> 32), pcie_data_offset);
1177 readl(pcie_data_offset);
1178 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1181 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1182 u64 reg_addr, u64 reg_data)
1184 unsigned long flags, pcie_index, pcie_data;
1185 unsigned long pcie_index_hi = 0;
1186 void __iomem *pcie_index_offset;
1187 void __iomem *pcie_index_hi_offset;
1188 void __iomem *pcie_data_offset;
1190 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1191 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1192 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1193 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1195 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1196 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1197 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1198 if (pcie_index_hi != 0)
1199 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1202 /* write low 32 bits */
1203 writel(reg_addr, pcie_index_offset);
1204 readl(pcie_index_offset);
1205 if (pcie_index_hi != 0) {
1206 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1207 readl(pcie_index_hi_offset);
1209 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1210 readl(pcie_data_offset);
1211 /* write high 32 bits */
1212 writel(reg_addr + 4, pcie_index_offset);
1213 readl(pcie_index_offset);
1214 if (pcie_index_hi != 0) {
1215 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1216 readl(pcie_index_hi_offset);
1218 writel((u32)(reg_data >> 32), pcie_data_offset);
1219 readl(pcie_data_offset);
1221 /* clear the high bits */
1222 if (pcie_index_hi != 0) {
1223 writel(0, pcie_index_hi_offset);
1224 readl(pcie_index_hi_offset);
1227 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1231 * amdgpu_device_get_rev_id - query device rev_id
1233 * @adev: amdgpu_device pointer
1235 * Return device rev_id
1237 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1239 return adev->nbio.funcs->get_rev_id(adev);
1243 * amdgpu_invalid_rreg - dummy reg read function
1245 * @adev: amdgpu_device pointer
1246 * @reg: offset of register
1248 * Dummy register read function. Used for register blocks
1249 * that certain asics don't have (all asics).
1250 * Returns the value in the register.
1252 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1254 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1259 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1261 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1267 * amdgpu_invalid_wreg - dummy reg write function
1269 * @adev: amdgpu_device pointer
1270 * @reg: offset of register
1271 * @v: value to write to the register
1273 * Dummy register read function. Used for register blocks
1274 * that certain asics don't have (all asics).
1276 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1278 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1283 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1285 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1291 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1293 * @adev: amdgpu_device pointer
1294 * @reg: offset of register
1296 * Dummy register read function. Used for register blocks
1297 * that certain asics don't have (all asics).
1298 * Returns the value in the register.
1300 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1302 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1307 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1309 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1315 * amdgpu_invalid_wreg64 - dummy reg write function
1317 * @adev: amdgpu_device pointer
1318 * @reg: offset of register
1319 * @v: value to write to the register
1321 * Dummy register read function. Used for register blocks
1322 * that certain asics don't have (all asics).
1324 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1326 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1331 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1333 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1339 * amdgpu_block_invalid_rreg - dummy reg read function
1341 * @adev: amdgpu_device pointer
1342 * @block: offset of instance
1343 * @reg: offset of register
1345 * Dummy register read function. Used for register blocks
1346 * that certain asics don't have (all asics).
1347 * Returns the value in the register.
1349 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1350 uint32_t block, uint32_t reg)
1352 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1359 * amdgpu_block_invalid_wreg - dummy reg write function
1361 * @adev: amdgpu_device pointer
1362 * @block: offset of instance
1363 * @reg: offset of register
1364 * @v: value to write to the register
1366 * Dummy register read function. Used for register blocks
1367 * that certain asics don't have (all asics).
1369 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1371 uint32_t reg, uint32_t v)
1373 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1379 * amdgpu_device_asic_init - Wrapper for atom asic_init
1381 * @adev: amdgpu_device pointer
1383 * Does any asic specific work and then calls atom asic init.
1385 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1389 amdgpu_asic_pre_asic_init(adev);
1391 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1392 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1393 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1394 amdgpu_psp_wait_for_bootloader(adev);
1395 ret = amdgpu_atomfirmware_asic_init(adev, true);
1398 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1405 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1407 * @adev: amdgpu_device pointer
1409 * Allocates a scratch page of VRAM for use by various things in the
1412 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1414 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1415 AMDGPU_GEM_DOMAIN_VRAM |
1416 AMDGPU_GEM_DOMAIN_GTT,
1417 &adev->mem_scratch.robj,
1418 &adev->mem_scratch.gpu_addr,
1419 (void **)&adev->mem_scratch.ptr);
1423 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1425 * @adev: amdgpu_device pointer
1427 * Frees the VRAM scratch page.
1429 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1431 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1435 * amdgpu_device_program_register_sequence - program an array of registers.
1437 * @adev: amdgpu_device pointer
1438 * @registers: pointer to the register array
1439 * @array_size: size of the register array
1441 * Programs an array or registers with and or masks.
1442 * This is a helper for setting golden registers.
1444 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1445 const u32 *registers,
1446 const u32 array_size)
1448 u32 tmp, reg, and_mask, or_mask;
1454 for (i = 0; i < array_size; i += 3) {
1455 reg = registers[i + 0];
1456 and_mask = registers[i + 1];
1457 or_mask = registers[i + 2];
1459 if (and_mask == 0xffffffff) {
1464 if (adev->family >= AMDGPU_FAMILY_AI)
1465 tmp |= (or_mask & and_mask);
1474 * amdgpu_device_pci_config_reset - reset the GPU
1476 * @adev: amdgpu_device pointer
1478 * Resets the GPU using the pci config reset sequence.
1479 * Only applicable to asics prior to vega10.
1481 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1483 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1487 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1489 * @adev: amdgpu_device pointer
1491 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1493 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1495 return pci_reset_function(adev->pdev);
1499 * amdgpu_device_wb_*()
1500 * Writeback is the method by which the GPU updates special pages in memory
1501 * with the status of certain GPU events (fences, ring pointers,etc.).
1505 * amdgpu_device_wb_fini - Disable Writeback and free memory
1507 * @adev: amdgpu_device pointer
1509 * Disables Writeback and frees the Writeback memory (all asics).
1510 * Used at driver shutdown.
1512 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1514 if (adev->wb.wb_obj) {
1515 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1517 (void **)&adev->wb.wb);
1518 adev->wb.wb_obj = NULL;
1523 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1525 * @adev: amdgpu_device pointer
1527 * Initializes writeback and allocates writeback memory (all asics).
1528 * Used at driver startup.
1529 * Returns 0 on success or an -error on failure.
1531 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1535 if (adev->wb.wb_obj == NULL) {
1536 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1537 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1538 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1539 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1540 (void **)&adev->wb.wb);
1542 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1546 adev->wb.num_wb = AMDGPU_MAX_WB;
1547 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1549 /* clear wb memory */
1550 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1557 * amdgpu_device_wb_get - Allocate a wb entry
1559 * @adev: amdgpu_device pointer
1562 * Allocate a wb slot for use by the driver (all asics).
1563 * Returns 0 on success or -EINVAL on failure.
1565 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1567 unsigned long flags, offset;
1569 spin_lock_irqsave(&adev->wb.lock, flags);
1570 offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1571 if (offset < adev->wb.num_wb) {
1572 __set_bit(offset, adev->wb.used);
1573 spin_unlock_irqrestore(&adev->wb.lock, flags);
1574 *wb = offset << 3; /* convert to dw offset */
1577 spin_unlock_irqrestore(&adev->wb.lock, flags);
1583 * amdgpu_device_wb_free - Free a wb entry
1585 * @adev: amdgpu_device pointer
1588 * Free a wb slot allocated for use by the driver (all asics)
1590 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1592 unsigned long flags;
1595 spin_lock_irqsave(&adev->wb.lock, flags);
1596 if (wb < adev->wb.num_wb)
1597 __clear_bit(wb, adev->wb.used);
1598 spin_unlock_irqrestore(&adev->wb.lock, flags);
1602 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1604 * @adev: amdgpu_device pointer
1606 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1607 * to fail, but if any of the BARs is not accessible after the size we abort
1608 * driver loading by returning -ENODEV.
1610 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1612 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1613 struct pci_bus *root;
1614 struct resource *res;
1619 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1623 if (amdgpu_sriov_vf(adev))
1626 /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1627 if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1628 DRM_WARN("System can't access extended configuration space, please check!!\n");
1630 /* skip if the bios has already enabled large BAR */
1631 if (adev->gmc.real_vram_size &&
1632 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1635 /* Check if the root BUS has 64bit memory resources */
1636 root = adev->pdev->bus;
1637 while (root->parent)
1638 root = root->parent;
1640 pci_bus_for_each_resource(root, res, i) {
1641 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1642 res->start > 0x100000000ull)
1646 /* Trying to resize is pointless without a root hub window above 4GB */
1650 /* Limit the BAR size to what is available */
1651 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1654 /* Disable memory decoding while we change the BAR addresses and size */
1655 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1656 pci_write_config_word(adev->pdev, PCI_COMMAND,
1657 cmd & ~PCI_COMMAND_MEMORY);
1659 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1660 amdgpu_doorbell_fini(adev);
1661 if (adev->asic_type >= CHIP_BONAIRE)
1662 pci_release_resource(adev->pdev, 2);
1664 pci_release_resource(adev->pdev, 0);
1666 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1668 DRM_INFO("Not enough PCI address space for a large BAR.");
1669 else if (r && r != -ENOTSUPP)
1670 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1672 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1674 /* When the doorbell or fb BAR isn't available we have no chance of
1677 r = amdgpu_doorbell_init(adev);
1678 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1681 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1686 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1688 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1695 * GPU helpers function.
1698 * amdgpu_device_need_post - check if the hw need post or not
1700 * @adev: amdgpu_device pointer
1702 * Check if the asic has been initialized (all asics) at driver startup
1703 * or post is needed if hw reset is performed.
1704 * Returns true if need or false if not.
1706 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1710 if (amdgpu_sriov_vf(adev))
1713 if (!amdgpu_device_read_bios(adev))
1716 if (amdgpu_passthrough(adev)) {
1717 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1718 * some old smc fw still need driver do vPost otherwise gpu hang, while
1719 * those smc fw version above 22.15 doesn't have this flaw, so we force
1720 * vpost executed for smc version below 22.15
1722 if (adev->asic_type == CHIP_FIJI) {
1726 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1727 /* force vPost if error occured */
1731 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1732 release_firmware(adev->pm.fw);
1733 if (fw_ver < 0x00160e00)
1738 /* Don't post if we need to reset whole hive on init */
1739 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1742 if (adev->has_hw_reset) {
1743 adev->has_hw_reset = false;
1747 /* bios scratch used on CIK+ */
1748 if (adev->asic_type >= CHIP_BONAIRE)
1749 return amdgpu_atombios_scratch_need_asic_init(adev);
1751 /* check MEM_SIZE for older asics */
1752 reg = amdgpu_asic_get_config_memsize(adev);
1754 if ((reg != 0) && (reg != 0xffffffff))
1761 * Check whether seamless boot is supported.
1763 * So far we only support seamless boot on DCE 3.0 or later.
1764 * If users report that it works on older ASICS as well, we may
1767 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1769 switch (amdgpu_seamless) {
1777 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1782 if (!(adev->flags & AMD_IS_APU))
1785 if (adev->mman.keep_stolen_vga_memory)
1788 return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1792 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1793 * don't support dynamic speed switching. Until we have confirmation from Intel
1794 * that a specific host supports it, it's safer that we keep it disabled for all.
1796 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1797 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1799 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1801 #if IS_ENABLED(CONFIG_X86)
1802 struct cpuinfo_x86 *c = &cpu_data(0);
1804 /* eGPU change speeds based on USB4 fabric conditions */
1805 if (dev_is_removable(adev->dev))
1808 if (c->x86_vendor == X86_VENDOR_INTEL)
1815 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1817 * @adev: amdgpu_device pointer
1819 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1820 * be set for this device.
1822 * Returns true if it should be used or false if not.
1824 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1826 switch (amdgpu_aspm) {
1836 if (adev->flags & AMD_IS_APU)
1838 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1840 return pcie_aspm_enabled(adev->pdev);
1843 /* if we get transitioned to only one device, take VGA back */
1845 * amdgpu_device_vga_set_decode - enable/disable vga decode
1847 * @pdev: PCI device pointer
1848 * @state: enable/disable vga decode
1850 * Enable/disable vga decode (all asics).
1851 * Returns VGA resource flags.
1853 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1856 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1858 amdgpu_asic_set_vga_state(adev, state);
1860 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1861 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1863 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1867 * amdgpu_device_check_block_size - validate the vm block size
1869 * @adev: amdgpu_device pointer
1871 * Validates the vm block size specified via module parameter.
1872 * The vm block size defines number of bits in page table versus page directory,
1873 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1874 * page table and the remaining bits are in the page directory.
1876 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1878 /* defines number of bits in page table versus page directory,
1879 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1880 * page table and the remaining bits are in the page directory
1882 if (amdgpu_vm_block_size == -1)
1885 if (amdgpu_vm_block_size < 9) {
1886 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1887 amdgpu_vm_block_size);
1888 amdgpu_vm_block_size = -1;
1893 * amdgpu_device_check_vm_size - validate the vm size
1895 * @adev: amdgpu_device pointer
1897 * Validates the vm size in GB specified via module parameter.
1898 * The VM size is the size of the GPU virtual memory space in GB.
1900 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1902 /* no need to check the default value */
1903 if (amdgpu_vm_size == -1)
1906 if (amdgpu_vm_size < 1) {
1907 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1909 amdgpu_vm_size = -1;
1913 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1916 bool is_os_64 = (sizeof(void *) == 8);
1917 uint64_t total_memory;
1918 uint64_t dram_size_seven_GB = 0x1B8000000;
1919 uint64_t dram_size_three_GB = 0xB8000000;
1921 if (amdgpu_smu_memory_pool_size == 0)
1925 DRM_WARN("Not 64-bit OS, feature not supported\n");
1929 total_memory = (uint64_t)si.totalram * si.mem_unit;
1931 if ((amdgpu_smu_memory_pool_size == 1) ||
1932 (amdgpu_smu_memory_pool_size == 2)) {
1933 if (total_memory < dram_size_three_GB)
1935 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1936 (amdgpu_smu_memory_pool_size == 8)) {
1937 if (total_memory < dram_size_seven_GB)
1940 DRM_WARN("Smu memory pool size not supported\n");
1943 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1948 DRM_WARN("No enough system memory\n");
1950 adev->pm.smu_prv_buffer_size = 0;
1953 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1955 if (!(adev->flags & AMD_IS_APU) ||
1956 adev->asic_type < CHIP_RAVEN)
1959 switch (adev->asic_type) {
1961 if (adev->pdev->device == 0x15dd)
1962 adev->apu_flags |= AMD_APU_IS_RAVEN;
1963 if (adev->pdev->device == 0x15d8)
1964 adev->apu_flags |= AMD_APU_IS_PICASSO;
1967 if ((adev->pdev->device == 0x1636) ||
1968 (adev->pdev->device == 0x164c))
1969 adev->apu_flags |= AMD_APU_IS_RENOIR;
1971 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1974 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1976 case CHIP_YELLOW_CARP:
1978 case CHIP_CYAN_SKILLFISH:
1979 if ((adev->pdev->device == 0x13FE) ||
1980 (adev->pdev->device == 0x143F))
1981 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1991 * amdgpu_device_check_arguments - validate module params
1993 * @adev: amdgpu_device pointer
1995 * Validates certain module parameters and updates
1996 * the associated values used by the driver (all asics).
1998 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2002 if (amdgpu_sched_jobs < 4) {
2003 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2005 amdgpu_sched_jobs = 4;
2006 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
2007 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2009 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2012 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
2013 /* gart size must be greater or equal to 32M */
2014 dev_warn(adev->dev, "gart size (%d) too small\n",
2016 amdgpu_gart_size = -1;
2019 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
2020 /* gtt size must be greater or equal to 32M */
2021 dev_warn(adev->dev, "gtt size (%d) too small\n",
2023 amdgpu_gtt_size = -1;
2026 /* valid range is between 4 and 9 inclusive */
2027 if (amdgpu_vm_fragment_size != -1 &&
2028 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
2029 dev_warn(adev->dev, "valid range is between 4 and 9\n");
2030 amdgpu_vm_fragment_size = -1;
2033 if (amdgpu_sched_hw_submission < 2) {
2034 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2035 amdgpu_sched_hw_submission);
2036 amdgpu_sched_hw_submission = 2;
2037 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2038 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2039 amdgpu_sched_hw_submission);
2040 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2043 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2044 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2045 amdgpu_reset_method = -1;
2048 amdgpu_device_check_smu_prv_buffer_size(adev);
2050 amdgpu_device_check_vm_size(adev);
2052 amdgpu_device_check_block_size(adev);
2054 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2056 for (i = 0; i < MAX_XCP; i++)
2057 adev->enforce_isolation[i] = !!enforce_isolation;
2063 * amdgpu_switcheroo_set_state - set switcheroo state
2065 * @pdev: pci dev pointer
2066 * @state: vga_switcheroo state
2068 * Callback for the switcheroo driver. Suspends or resumes
2069 * the asics before or after it is powered up using ACPI methods.
2071 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2072 enum vga_switcheroo_state state)
2074 struct drm_device *dev = pci_get_drvdata(pdev);
2077 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
2080 if (state == VGA_SWITCHEROO_ON) {
2081 pr_info("switched on\n");
2082 /* don't suspend or resume card normally */
2083 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2085 pci_set_power_state(pdev, PCI_D0);
2086 amdgpu_device_load_pci_state(pdev);
2087 r = pci_enable_device(pdev);
2089 DRM_WARN("pci_enable_device failed (%d)\n", r);
2090 amdgpu_device_resume(dev, true);
2092 dev->switch_power_state = DRM_SWITCH_POWER_ON;
2094 pr_info("switched off\n");
2095 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2096 amdgpu_device_prepare(dev);
2097 amdgpu_device_suspend(dev, true);
2098 amdgpu_device_cache_pci_state(pdev);
2099 /* Shut down the device */
2100 pci_disable_device(pdev);
2101 pci_set_power_state(pdev, PCI_D3cold);
2102 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2107 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2109 * @pdev: pci dev pointer
2111 * Callback for the switcheroo driver. Check of the switcheroo
2112 * state can be changed.
2113 * Returns true if the state can be changed, false if not.
2115 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2117 struct drm_device *dev = pci_get_drvdata(pdev);
2120 * FIXME: open_count is protected by drm_global_mutex but that would lead to
2121 * locking inversion with the driver load path. And the access here is
2122 * completely racy anyway. So don't bother with locking for now.
2124 return atomic_read(&dev->open_count) == 0;
2127 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2128 .set_gpu_state = amdgpu_switcheroo_set_state,
2130 .can_switch = amdgpu_switcheroo_can_switch,
2134 * amdgpu_device_ip_set_clockgating_state - set the CG state
2136 * @dev: amdgpu_device pointer
2137 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2138 * @state: clockgating state (gate or ungate)
2140 * Sets the requested clockgating state for all instances of
2141 * the hardware IP specified.
2142 * Returns the error code from the last instance.
2144 int amdgpu_device_ip_set_clockgating_state(void *dev,
2145 enum amd_ip_block_type block_type,
2146 enum amd_clockgating_state state)
2148 struct amdgpu_device *adev = dev;
2151 for (i = 0; i < adev->num_ip_blocks; i++) {
2152 if (!adev->ip_blocks[i].status.valid)
2154 if (adev->ip_blocks[i].version->type != block_type)
2156 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2158 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2159 (void *)adev, state);
2161 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2162 adev->ip_blocks[i].version->funcs->name, r);
2168 * amdgpu_device_ip_set_powergating_state - set the PG state
2170 * @dev: amdgpu_device pointer
2171 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2172 * @state: powergating state (gate or ungate)
2174 * Sets the requested powergating state for all instances of
2175 * the hardware IP specified.
2176 * Returns the error code from the last instance.
2178 int amdgpu_device_ip_set_powergating_state(void *dev,
2179 enum amd_ip_block_type block_type,
2180 enum amd_powergating_state state)
2182 struct amdgpu_device *adev = dev;
2185 for (i = 0; i < adev->num_ip_blocks; i++) {
2186 if (!adev->ip_blocks[i].status.valid)
2188 if (adev->ip_blocks[i].version->type != block_type)
2190 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2192 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2193 (void *)adev, state);
2195 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2196 adev->ip_blocks[i].version->funcs->name, r);
2202 * amdgpu_device_ip_get_clockgating_state - get the CG state
2204 * @adev: amdgpu_device pointer
2205 * @flags: clockgating feature flags
2207 * Walks the list of IPs on the device and updates the clockgating
2208 * flags for each IP.
2209 * Updates @flags with the feature flags for each hardware IP where
2210 * clockgating is enabled.
2212 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2217 for (i = 0; i < adev->num_ip_blocks; i++) {
2218 if (!adev->ip_blocks[i].status.valid)
2220 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2221 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2226 * amdgpu_device_ip_wait_for_idle - wait for idle
2228 * @adev: amdgpu_device pointer
2229 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2231 * Waits for the request hardware IP to be idle.
2232 * Returns 0 for success or a negative error code on failure.
2234 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2235 enum amd_ip_block_type block_type)
2239 for (i = 0; i < adev->num_ip_blocks; i++) {
2240 if (!adev->ip_blocks[i].status.valid)
2242 if (adev->ip_blocks[i].version->type == block_type) {
2243 if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2244 r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2245 &adev->ip_blocks[i]);
2257 * amdgpu_device_ip_is_valid - is the hardware IP enabled
2259 * @adev: amdgpu_device pointer
2260 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2262 * Check if the hardware IP is enable or not.
2263 * Returns true if it the IP is enable, false if not.
2265 bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2266 enum amd_ip_block_type block_type)
2270 for (i = 0; i < adev->num_ip_blocks; i++) {
2271 if (adev->ip_blocks[i].version->type == block_type)
2272 return adev->ip_blocks[i].status.valid;
2279 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2281 * @adev: amdgpu_device pointer
2282 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2284 * Returns a pointer to the hardware IP block structure
2285 * if it exists for the asic, otherwise NULL.
2287 struct amdgpu_ip_block *
2288 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2289 enum amd_ip_block_type type)
2293 for (i = 0; i < adev->num_ip_blocks; i++)
2294 if (adev->ip_blocks[i].version->type == type)
2295 return &adev->ip_blocks[i];
2301 * amdgpu_device_ip_block_version_cmp
2303 * @adev: amdgpu_device pointer
2304 * @type: enum amd_ip_block_type
2305 * @major: major version
2306 * @minor: minor version
2308 * return 0 if equal or greater
2309 * return 1 if smaller or the ip_block doesn't exist
2311 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2312 enum amd_ip_block_type type,
2313 u32 major, u32 minor)
2315 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2317 if (ip_block && ((ip_block->version->major > major) ||
2318 ((ip_block->version->major == major) &&
2319 (ip_block->version->minor >= minor))))
2326 * amdgpu_device_ip_block_add
2328 * @adev: amdgpu_device pointer
2329 * @ip_block_version: pointer to the IP to add
2331 * Adds the IP block driver information to the collection of IPs
2334 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2335 const struct amdgpu_ip_block_version *ip_block_version)
2337 if (!ip_block_version)
2340 switch (ip_block_version->type) {
2341 case AMD_IP_BLOCK_TYPE_VCN:
2342 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2345 case AMD_IP_BLOCK_TYPE_JPEG:
2346 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2353 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2354 ip_block_version->funcs->name);
2356 adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2358 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2364 * amdgpu_device_enable_virtual_display - enable virtual display feature
2366 * @adev: amdgpu_device pointer
2368 * Enabled the virtual display feature if the user has enabled it via
2369 * the module parameter virtual_display. This feature provides a virtual
2370 * display hardware on headless boards or in virtualized environments.
2371 * This function parses and validates the configuration string specified by
2372 * the user and configues the virtual display configuration (number of
2373 * virtual connectors, crtcs, etc.) specified.
2375 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2377 adev->enable_virtual_display = false;
2379 if (amdgpu_virtual_display) {
2380 const char *pci_address_name = pci_name(adev->pdev);
2381 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2383 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2384 pciaddstr_tmp = pciaddstr;
2385 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2386 pciaddname = strsep(&pciaddname_tmp, ",");
2387 if (!strcmp("all", pciaddname)
2388 || !strcmp(pci_address_name, pciaddname)) {
2392 adev->enable_virtual_display = true;
2395 res = kstrtol(pciaddname_tmp, 10,
2403 adev->mode_info.num_crtc = num_crtc;
2405 adev->mode_info.num_crtc = 1;
2411 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2412 amdgpu_virtual_display, pci_address_name,
2413 adev->enable_virtual_display, adev->mode_info.num_crtc);
2419 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2421 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2422 adev->mode_info.num_crtc = 1;
2423 adev->enable_virtual_display = true;
2424 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2425 adev->enable_virtual_display, adev->mode_info.num_crtc);
2430 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2432 * @adev: amdgpu_device pointer
2434 * Parses the asic configuration parameters specified in the gpu info
2435 * firmware and makes them availale to the driver for use in configuring
2437 * Returns 0 on success, -EINVAL on failure.
2439 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2441 const char *chip_name;
2443 const struct gpu_info_firmware_header_v1_0 *hdr;
2445 adev->firmware.gpu_info_fw = NULL;
2447 if (adev->mman.discovery_bin)
2450 switch (adev->asic_type) {
2454 chip_name = "vega10";
2457 chip_name = "vega12";
2460 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2461 chip_name = "raven2";
2462 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2463 chip_name = "picasso";
2465 chip_name = "raven";
2468 chip_name = "arcturus";
2471 chip_name = "navi12";
2475 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2476 "amdgpu/%s_gpu_info.bin", chip_name);
2479 "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2484 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2485 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2487 switch (hdr->version_major) {
2490 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2491 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2492 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2495 * Should be droped when DAL no longer needs it.
2497 if (adev->asic_type == CHIP_NAVI12)
2498 goto parse_soc_bounding_box;
2500 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2501 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2502 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2503 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2504 adev->gfx.config.max_texture_channel_caches =
2505 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2506 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2507 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2508 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2509 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2510 adev->gfx.config.double_offchip_lds_buf =
2511 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2512 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2513 adev->gfx.cu_info.max_waves_per_simd =
2514 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2515 adev->gfx.cu_info.max_scratch_slots_per_cu =
2516 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2517 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2518 if (hdr->version_minor >= 1) {
2519 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2520 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2521 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2522 adev->gfx.config.num_sc_per_sh =
2523 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2524 adev->gfx.config.num_packer_per_sc =
2525 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2528 parse_soc_bounding_box:
2530 * soc bounding box info is not integrated in disocovery table,
2531 * we always need to parse it from gpu info firmware if needed.
2533 if (hdr->version_minor == 2) {
2534 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2535 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2536 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2537 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2543 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2552 * amdgpu_device_ip_early_init - run early init for hardware IPs
2554 * @adev: amdgpu_device pointer
2556 * Early initialization pass for hardware IPs. The hardware IPs that make
2557 * up each asic are discovered each IP's early_init callback is run. This
2558 * is the first stage in initializing the asic.
2559 * Returns 0 on success, negative error code on failure.
2561 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2563 struct amdgpu_ip_block *ip_block;
2564 struct pci_dev *parent;
2568 amdgpu_device_enable_virtual_display(adev);
2570 if (amdgpu_sriov_vf(adev)) {
2571 r = amdgpu_virt_request_full_gpu(adev, true);
2576 switch (adev->asic_type) {
2577 #ifdef CONFIG_DRM_AMDGPU_SI
2583 adev->family = AMDGPU_FAMILY_SI;
2584 r = si_set_ip_blocks(adev);
2589 #ifdef CONFIG_DRM_AMDGPU_CIK
2595 if (adev->flags & AMD_IS_APU)
2596 adev->family = AMDGPU_FAMILY_KV;
2598 adev->family = AMDGPU_FAMILY_CI;
2600 r = cik_set_ip_blocks(adev);
2608 case CHIP_POLARIS10:
2609 case CHIP_POLARIS11:
2610 case CHIP_POLARIS12:
2614 if (adev->flags & AMD_IS_APU)
2615 adev->family = AMDGPU_FAMILY_CZ;
2617 adev->family = AMDGPU_FAMILY_VI;
2619 r = vi_set_ip_blocks(adev);
2624 r = amdgpu_discovery_set_ip_blocks(adev);
2630 if (amdgpu_has_atpx() &&
2631 (amdgpu_is_atpx_hybrid() ||
2632 amdgpu_has_atpx_dgpu_power_cntl()) &&
2633 ((adev->flags & AMD_IS_APU) == 0) &&
2634 !dev_is_removable(&adev->pdev->dev))
2635 adev->flags |= AMD_IS_PX;
2637 if (!(adev->flags & AMD_IS_APU)) {
2638 parent = pcie_find_root_port(adev->pdev);
2639 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2643 adev->pm.pp_feature = amdgpu_pp_feature_mask;
2644 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2645 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2646 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2647 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2648 if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2649 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2652 for (i = 0; i < adev->num_ip_blocks; i++) {
2653 ip_block = &adev->ip_blocks[i];
2655 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2656 DRM_WARN("disabled ip block: %d <%s>\n",
2657 i, adev->ip_blocks[i].version->funcs->name);
2658 adev->ip_blocks[i].status.valid = false;
2659 } else if (ip_block->version->funcs->early_init) {
2660 r = ip_block->version->funcs->early_init(ip_block);
2662 adev->ip_blocks[i].status.valid = false;
2664 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2665 adev->ip_blocks[i].version->funcs->name, r);
2668 adev->ip_blocks[i].status.valid = true;
2671 adev->ip_blocks[i].status.valid = true;
2673 /* get the vbios after the asic_funcs are set up */
2674 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2675 r = amdgpu_device_parse_gpu_info_fw(adev);
2680 if (amdgpu_device_read_bios(adev)) {
2681 if (!amdgpu_get_bios(adev))
2684 r = amdgpu_atombios_init(adev);
2686 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2687 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2692 /*get pf2vf msg info at it's earliest time*/
2693 if (amdgpu_sriov_vf(adev))
2694 amdgpu_virt_init_data_exchange(adev);
2701 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2702 if (ip_block->status.valid != false)
2703 amdgpu_amdkfd_device_probe(adev);
2705 adev->cg_flags &= amdgpu_cg_mask;
2706 adev->pg_flags &= amdgpu_pg_mask;
2711 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2715 for (i = 0; i < adev->num_ip_blocks; i++) {
2716 if (!adev->ip_blocks[i].status.sw)
2718 if (adev->ip_blocks[i].status.hw)
2720 if (!amdgpu_ip_member_of_hwini(
2721 adev, adev->ip_blocks[i].version->type))
2723 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2724 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2725 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2726 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2728 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2729 adev->ip_blocks[i].version->funcs->name, r);
2732 adev->ip_blocks[i].status.hw = true;
2739 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2743 for (i = 0; i < adev->num_ip_blocks; i++) {
2744 if (!adev->ip_blocks[i].status.sw)
2746 if (adev->ip_blocks[i].status.hw)
2748 if (!amdgpu_ip_member_of_hwini(
2749 adev, adev->ip_blocks[i].version->type))
2751 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2753 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2754 adev->ip_blocks[i].version->funcs->name, r);
2757 adev->ip_blocks[i].status.hw = true;
2763 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2767 uint32_t smu_version;
2769 if (adev->asic_type >= CHIP_VEGA10) {
2770 for (i = 0; i < adev->num_ip_blocks; i++) {
2771 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2774 if (!amdgpu_ip_member_of_hwini(adev,
2775 AMD_IP_BLOCK_TYPE_PSP))
2778 if (!adev->ip_blocks[i].status.sw)
2781 /* no need to do the fw loading again if already done*/
2782 if (adev->ip_blocks[i].status.hw == true)
2785 if (amdgpu_in_reset(adev) || adev->in_suspend) {
2786 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
2790 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2792 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2793 adev->ip_blocks[i].version->funcs->name, r);
2796 adev->ip_blocks[i].status.hw = true;
2802 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2803 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2808 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2813 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2814 struct amdgpu_ring *ring = adev->rings[i];
2816 /* No need to setup the GPU scheduler for rings that don't need it */
2817 if (!ring || ring->no_scheduler)
2820 switch (ring->funcs->type) {
2821 case AMDGPU_RING_TYPE_GFX:
2822 timeout = adev->gfx_timeout;
2824 case AMDGPU_RING_TYPE_COMPUTE:
2825 timeout = adev->compute_timeout;
2827 case AMDGPU_RING_TYPE_SDMA:
2828 timeout = adev->sdma_timeout;
2831 timeout = adev->video_timeout;
2835 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2836 DRM_SCHED_PRIORITY_COUNT,
2837 ring->num_hw_submission, 0,
2838 timeout, adev->reset_domain->wq,
2839 ring->sched_score, ring->name,
2842 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2846 r = amdgpu_uvd_entity_init(adev, ring);
2848 DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2852 r = amdgpu_vce_entity_init(adev, ring);
2854 DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2860 amdgpu_xcp_update_partition_sched_list(adev);
2867 * amdgpu_device_ip_init - run init for hardware IPs
2869 * @adev: amdgpu_device pointer
2871 * Main initialization pass for hardware IPs. The list of all the hardware
2872 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2873 * are run. sw_init initializes the software state associated with each IP
2874 * and hw_init initializes the hardware associated with each IP.
2875 * Returns 0 on success, negative error code on failure.
2877 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2882 r = amdgpu_ras_init(adev);
2886 for (i = 0; i < adev->num_ip_blocks; i++) {
2887 if (!adev->ip_blocks[i].status.valid)
2889 if (adev->ip_blocks[i].version->funcs->sw_init) {
2890 r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2892 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2893 adev->ip_blocks[i].version->funcs->name, r);
2897 adev->ip_blocks[i].status.sw = true;
2899 if (!amdgpu_ip_member_of_hwini(
2900 adev, adev->ip_blocks[i].version->type))
2903 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2904 /* need to do common hw init early so everything is set up for gmc */
2905 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2907 DRM_ERROR("hw_init %d failed %d\n", i, r);
2910 adev->ip_blocks[i].status.hw = true;
2911 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2912 /* need to do gmc hw init early so we can allocate gpu mem */
2913 /* Try to reserve bad pages early */
2914 if (amdgpu_sriov_vf(adev))
2915 amdgpu_virt_exchange_data(adev);
2917 r = amdgpu_device_mem_scratch_init(adev);
2919 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2922 r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2924 DRM_ERROR("hw_init %d failed %d\n", i, r);
2927 r = amdgpu_device_wb_init(adev);
2929 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2932 adev->ip_blocks[i].status.hw = true;
2934 /* right after GMC hw init, we create CSA */
2935 if (adev->gfx.mcbp) {
2936 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2937 AMDGPU_GEM_DOMAIN_VRAM |
2938 AMDGPU_GEM_DOMAIN_GTT,
2941 DRM_ERROR("allocate CSA failed %d\n", r);
2946 r = amdgpu_seq64_init(adev);
2948 DRM_ERROR("allocate seq64 failed %d\n", r);
2954 if (amdgpu_sriov_vf(adev))
2955 amdgpu_virt_init_data_exchange(adev);
2957 r = amdgpu_ib_pool_init(adev);
2959 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2960 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2964 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2968 r = amdgpu_device_ip_hw_init_phase1(adev);
2972 r = amdgpu_device_fw_loading(adev);
2976 r = amdgpu_device_ip_hw_init_phase2(adev);
2981 * retired pages will be loaded from eeprom and reserved here,
2982 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2983 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2984 * for I2C communication which only true at this point.
2986 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2987 * failure from bad gpu situation and stop amdgpu init process
2988 * accordingly. For other failed cases, it will still release all
2989 * the resource and print error message, rather than returning one
2990 * negative value to upper level.
2992 * Note: theoretically, this should be called before all vram allocations
2993 * to protect retired page from abusing
2995 init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2996 r = amdgpu_ras_recovery_init(adev, init_badpage);
3001 * In case of XGMI grab extra reference for reset domain for this device
3003 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3004 if (amdgpu_xgmi_add_device(adev) == 0) {
3005 if (!amdgpu_sriov_vf(adev)) {
3006 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3008 if (WARN_ON(!hive)) {
3013 if (!hive->reset_domain ||
3014 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
3016 amdgpu_put_xgmi_hive(hive);
3020 /* Drop the early temporary reset domain we created for device */
3021 amdgpu_reset_put_reset_domain(adev->reset_domain);
3022 adev->reset_domain = hive->reset_domain;
3023 amdgpu_put_xgmi_hive(hive);
3028 r = amdgpu_device_init_schedulers(adev);
3032 if (adev->mman.buffer_funcs_ring->sched.ready)
3033 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3035 /* Don't init kfd if whole hive need to be reset during init */
3036 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3037 kgd2kfd_init_zone_device(adev);
3038 amdgpu_amdkfd_device_init(adev);
3041 amdgpu_fru_get_product_info(adev);
3049 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3051 * @adev: amdgpu_device pointer
3053 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
3054 * this function before a GPU reset. If the value is retained after a
3055 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
3057 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3059 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3063 * amdgpu_device_check_vram_lost - check if vram is valid
3065 * @adev: amdgpu_device pointer
3067 * Checks the reset magic value written to the gart pointer in VRAM.
3068 * The driver calls this after a GPU reset to see if the contents of
3069 * VRAM is lost or now.
3070 * returns true if vram is lost, false if not.
3072 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3074 if (memcmp(adev->gart.ptr, adev->reset_magic,
3075 AMDGPU_RESET_MAGIC_NUM))
3078 if (!amdgpu_in_reset(adev))
3082 * For all ASICs with baco/mode1 reset, the VRAM is
3083 * always assumed to be lost.
3085 switch (amdgpu_asic_reset_method(adev)) {
3086 case AMD_RESET_METHOD_BACO:
3087 case AMD_RESET_METHOD_MODE1:
3095 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3097 * @adev: amdgpu_device pointer
3098 * @state: clockgating state (gate or ungate)
3100 * The list of all the hardware IPs that make up the asic is walked and the
3101 * set_clockgating_state callbacks are run.
3102 * Late initialization pass enabling clockgating for hardware IPs.
3103 * Fini or suspend, pass disabling clockgating for hardware IPs.
3104 * Returns 0 on success, negative error code on failure.
3107 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3108 enum amd_clockgating_state state)
3112 if (amdgpu_emu_mode == 1)
3115 for (j = 0; j < adev->num_ip_blocks; j++) {
3116 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3117 if (!adev->ip_blocks[i].status.late_initialized)
3119 /* skip CG for GFX, SDMA on S0ix */
3120 if (adev->in_s0ix &&
3121 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3122 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3124 /* skip CG for VCE/UVD, it's handled specially */
3125 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3126 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3127 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3128 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3129 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3130 /* enable clockgating to save power */
3131 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3134 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3135 adev->ip_blocks[i].version->funcs->name, r);
3144 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3145 enum amd_powergating_state state)
3149 if (amdgpu_emu_mode == 1)
3152 for (j = 0; j < adev->num_ip_blocks; j++) {
3153 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3154 if (!adev->ip_blocks[i].status.late_initialized)
3156 /* skip PG for GFX, SDMA on S0ix */
3157 if (adev->in_s0ix &&
3158 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3159 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3161 /* skip CG for VCE/UVD, it's handled specially */
3162 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3164 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3165 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3166 adev->ip_blocks[i].version->funcs->set_powergating_state) {
3167 /* enable powergating to save power */
3168 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3171 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3172 adev->ip_blocks[i].version->funcs->name, r);
3180 static int amdgpu_device_enable_mgpu_fan_boost(void)
3182 struct amdgpu_gpu_instance *gpu_ins;
3183 struct amdgpu_device *adev;
3186 mutex_lock(&mgpu_info.mutex);
3189 * MGPU fan boost feature should be enabled
3190 * only when there are two or more dGPUs in
3193 if (mgpu_info.num_dgpu < 2)
3196 for (i = 0; i < mgpu_info.num_dgpu; i++) {
3197 gpu_ins = &(mgpu_info.gpu_ins[i]);
3198 adev = gpu_ins->adev;
3199 if (!(adev->flags & AMD_IS_APU) &&
3200 !gpu_ins->mgpu_fan_enabled) {
3201 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3205 gpu_ins->mgpu_fan_enabled = 1;
3210 mutex_unlock(&mgpu_info.mutex);
3216 * amdgpu_device_ip_late_init - run late init for hardware IPs
3218 * @adev: amdgpu_device pointer
3220 * Late initialization pass for hardware IPs. The list of all the hardware
3221 * IPs that make up the asic is walked and the late_init callbacks are run.
3222 * late_init covers any special initialization that an IP requires
3223 * after all of the have been initialized or something that needs to happen
3224 * late in the init process.
3225 * Returns 0 on success, negative error code on failure.
3227 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3229 struct amdgpu_gpu_instance *gpu_instance;
3232 for (i = 0; i < adev->num_ip_blocks; i++) {
3233 if (!adev->ip_blocks[i].status.hw)
3235 if (adev->ip_blocks[i].version->funcs->late_init) {
3236 r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3238 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3239 adev->ip_blocks[i].version->funcs->name, r);
3243 adev->ip_blocks[i].status.late_initialized = true;
3246 r = amdgpu_ras_late_init(adev);
3248 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3252 if (!amdgpu_in_reset(adev))
3253 amdgpu_ras_set_error_query_ready(adev, true);
3255 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3256 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3258 amdgpu_device_fill_reset_magic(adev);
3260 r = amdgpu_device_enable_mgpu_fan_boost();
3262 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3264 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3265 if (amdgpu_passthrough(adev) &&
3266 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3267 adev->asic_type == CHIP_ALDEBARAN))
3268 amdgpu_dpm_handle_passthrough_sbr(adev, true);
3270 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3271 mutex_lock(&mgpu_info.mutex);
3274 * Reset device p-state to low as this was booted with high.
3276 * This should be performed only after all devices from the same
3277 * hive get initialized.
3279 * However, it's unknown how many device in the hive in advance.
3280 * As this is counted one by one during devices initializations.
3282 * So, we wait for all XGMI interlinked devices initialized.
3283 * This may bring some delays as those devices may come from
3284 * different hives. But that should be OK.
3286 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3287 for (i = 0; i < mgpu_info.num_gpu; i++) {
3288 gpu_instance = &(mgpu_info.gpu_ins[i]);
3289 if (gpu_instance->adev->flags & AMD_IS_APU)
3292 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3293 AMDGPU_XGMI_PSTATE_MIN);
3295 DRM_ERROR("pstate setting failed (%d).\n", r);
3301 mutex_unlock(&mgpu_info.mutex);
3307 static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3311 if (!ip_block->version->funcs->hw_fini) {
3312 DRM_ERROR("hw_fini of IP block <%s> not defined\n",
3313 ip_block->version->funcs->name);
3315 r = ip_block->version->funcs->hw_fini(ip_block);
3316 /* XXX handle errors */
3318 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3319 ip_block->version->funcs->name, r);
3323 ip_block->status.hw = false;
3327 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3329 * @adev: amdgpu_device pointer
3331 * For ASICs need to disable SMC first
3333 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3337 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3340 for (i = 0; i < adev->num_ip_blocks; i++) {
3341 if (!adev->ip_blocks[i].status.hw)
3343 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3344 amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3350 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3354 for (i = 0; i < adev->num_ip_blocks; i++) {
3355 if (!adev->ip_blocks[i].version->funcs->early_fini)
3358 r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3360 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3361 adev->ip_blocks[i].version->funcs->name, r);
3365 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3366 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3368 amdgpu_amdkfd_suspend(adev, false);
3370 /* Workaroud for ASICs need to disable SMC first */
3371 amdgpu_device_smu_fini_early(adev);
3373 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3374 if (!adev->ip_blocks[i].status.hw)
3377 amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
3380 if (amdgpu_sriov_vf(adev)) {
3381 if (amdgpu_virt_release_full_gpu(adev, false))
3382 DRM_ERROR("failed to release exclusive mode on fini\n");
3389 * amdgpu_device_ip_fini - run fini for hardware IPs
3391 * @adev: amdgpu_device pointer
3393 * Main teardown pass for hardware IPs. The list of all the hardware
3394 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3395 * are run. hw_fini tears down the hardware associated with each IP
3396 * and sw_fini tears down any software state associated with each IP.
3397 * Returns 0 on success, negative error code on failure.
3399 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3403 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3404 amdgpu_virt_release_ras_err_handler_data(adev);
3406 if (adev->gmc.xgmi.num_physical_nodes > 1)
3407 amdgpu_xgmi_remove_device(adev);
3409 amdgpu_amdkfd_device_fini_sw(adev);
3411 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3412 if (!adev->ip_blocks[i].status.sw)
3415 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3416 amdgpu_ucode_free_bo(adev);
3417 amdgpu_free_static_csa(&adev->virt.csa_obj);
3418 amdgpu_device_wb_fini(adev);
3419 amdgpu_device_mem_scratch_fini(adev);
3420 amdgpu_ib_pool_fini(adev);
3421 amdgpu_seq64_fini(adev);
3423 if (adev->ip_blocks[i].version->funcs->sw_fini) {
3424 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3425 /* XXX handle errors */
3427 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3428 adev->ip_blocks[i].version->funcs->name, r);
3431 adev->ip_blocks[i].status.sw = false;
3432 adev->ip_blocks[i].status.valid = false;
3435 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3436 if (!adev->ip_blocks[i].status.late_initialized)
3438 if (adev->ip_blocks[i].version->funcs->late_fini)
3439 adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3440 adev->ip_blocks[i].status.late_initialized = false;
3443 amdgpu_ras_fini(adev);
3449 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3451 * @work: work_struct.
3453 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3455 struct amdgpu_device *adev =
3456 container_of(work, struct amdgpu_device, delayed_init_work.work);
3459 r = amdgpu_ib_ring_tests(adev);
3461 DRM_ERROR("ib ring test failed (%d).\n", r);
3464 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3466 struct amdgpu_device *adev =
3467 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3469 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3470 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3472 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3473 adev->gfx.gfx_off_state = true;
3477 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3479 * @adev: amdgpu_device pointer
3481 * Main suspend function for hardware IPs. The list of all the hardware
3482 * IPs that make up the asic is walked, clockgating is disabled and the
3483 * suspend callbacks are run. suspend puts the hardware and software state
3484 * in each IP into a state suitable for suspend.
3485 * Returns 0 on success, negative error code on failure.
3487 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3491 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3492 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3495 * Per PMFW team's suggestion, driver needs to handle gfxoff
3496 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3497 * scenario. Add the missing df cstate disablement here.
3499 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3500 dev_warn(adev->dev, "Failed to disallow df cstate");
3502 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3503 if (!adev->ip_blocks[i].status.valid)
3506 /* displays are handled separately */
3507 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3510 /* XXX handle errors */
3511 r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3520 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3522 * @adev: amdgpu_device pointer
3524 * Main suspend function for hardware IPs. The list of all the hardware
3525 * IPs that make up the asic is walked, clockgating is disabled and the
3526 * suspend callbacks are run. suspend puts the hardware and software state
3527 * in each IP into a state suitable for suspend.
3528 * Returns 0 on success, negative error code on failure.
3530 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3535 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3537 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3538 if (!adev->ip_blocks[i].status.valid)
3540 /* displays are handled in phase1 */
3541 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3543 /* PSP lost connection when err_event_athub occurs */
3544 if (amdgpu_ras_intr_triggered() &&
3545 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3546 adev->ip_blocks[i].status.hw = false;
3550 /* skip unnecessary suspend if we do not initialize them yet */
3551 if (!amdgpu_ip_member_of_hwini(
3552 adev, adev->ip_blocks[i].version->type))
3555 /* skip suspend of gfx/mes and psp for S0ix
3556 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3557 * like at runtime. PSP is also part of the always on hardware
3558 * so no need to suspend it.
3560 if (adev->in_s0ix &&
3561 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3562 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3563 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3566 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3567 if (adev->in_s0ix &&
3568 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3569 IP_VERSION(5, 0, 0)) &&
3570 (adev->ip_blocks[i].version->type ==
3571 AMD_IP_BLOCK_TYPE_SDMA))
3574 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3575 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3576 * from this location and RLC Autoload automatically also gets loaded
3577 * from here based on PMFW -> PSP message during re-init sequence.
3578 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3579 * the TMR and reload FWs again for IMU enabled APU ASICs.
3581 if (amdgpu_in_reset(adev) &&
3582 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3583 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3586 /* XXX handle errors */
3587 r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
3588 adev->ip_blocks[i].status.hw = false;
3590 /* handle putting the SMC in the appropriate state */
3591 if (!amdgpu_sriov_vf(adev)) {
3592 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3593 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3595 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3596 adev->mp1_state, r);
3607 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3609 * @adev: amdgpu_device pointer
3611 * Main suspend function for hardware IPs. The list of all the hardware
3612 * IPs that make up the asic is walked, clockgating is disabled and the
3613 * suspend callbacks are run. suspend puts the hardware and software state
3614 * in each IP into a state suitable for suspend.
3615 * Returns 0 on success, negative error code on failure.
3617 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3621 if (amdgpu_sriov_vf(adev)) {
3622 amdgpu_virt_fini_data_exchange(adev);
3623 amdgpu_virt_request_full_gpu(adev, false);
3626 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3628 r = amdgpu_device_ip_suspend_phase1(adev);
3631 r = amdgpu_device_ip_suspend_phase2(adev);
3633 if (amdgpu_sriov_vf(adev))
3634 amdgpu_virt_release_full_gpu(adev, false);
3639 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3643 static enum amd_ip_block_type ip_order[] = {
3644 AMD_IP_BLOCK_TYPE_COMMON,
3645 AMD_IP_BLOCK_TYPE_GMC,
3646 AMD_IP_BLOCK_TYPE_PSP,
3647 AMD_IP_BLOCK_TYPE_IH,
3650 for (i = 0; i < adev->num_ip_blocks; i++) {
3652 struct amdgpu_ip_block *block;
3654 block = &adev->ip_blocks[i];
3655 block->status.hw = false;
3657 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3659 if (block->version->type != ip_order[j] ||
3660 !block->status.valid)
3663 r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3664 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3667 block->status.hw = true;
3674 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3678 static enum amd_ip_block_type ip_order[] = {
3679 AMD_IP_BLOCK_TYPE_SMC,
3680 AMD_IP_BLOCK_TYPE_DCE,
3681 AMD_IP_BLOCK_TYPE_GFX,
3682 AMD_IP_BLOCK_TYPE_SDMA,
3683 AMD_IP_BLOCK_TYPE_MES,
3684 AMD_IP_BLOCK_TYPE_UVD,
3685 AMD_IP_BLOCK_TYPE_VCE,
3686 AMD_IP_BLOCK_TYPE_VCN,
3687 AMD_IP_BLOCK_TYPE_JPEG
3690 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3692 struct amdgpu_ip_block *block;
3694 for (j = 0; j < adev->num_ip_blocks; j++) {
3695 block = &adev->ip_blocks[j];
3697 if (block->version->type != ip_order[i] ||
3698 !block->status.valid ||
3702 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3703 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3707 r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3709 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
3710 adev->ip_blocks[i].version->funcs->name, r);
3713 block->status.hw = true;
3722 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3724 * @adev: amdgpu_device pointer
3726 * First resume function for hardware IPs. The list of all the hardware
3727 * IPs that make up the asic is walked and the resume callbacks are run for
3728 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3729 * after a suspend and updates the software state as necessary. This
3730 * function is also used for restoring the GPU after a GPU reset.
3731 * Returns 0 on success, negative error code on failure.
3733 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3737 for (i = 0; i < adev->num_ip_blocks; i++) {
3738 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3740 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3743 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3745 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3755 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3757 * @adev: amdgpu_device pointer
3759 * First resume function for hardware IPs. The list of all the hardware
3760 * IPs that make up the asic is walked and the resume callbacks are run for
3761 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3762 * functional state after a suspend and updates the software state as
3763 * necessary. This function is also used for restoring the GPU after a GPU
3765 * Returns 0 on success, negative error code on failure.
3767 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3771 for (i = 0; i < adev->num_ip_blocks; i++) {
3772 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3774 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3775 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3776 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3777 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3779 r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
3788 * amdgpu_device_ip_resume - run resume for hardware IPs
3790 * @adev: amdgpu_device pointer
3792 * Main resume function for hardware IPs. The hardware IPs
3793 * are split into two resume functions because they are
3794 * also used in recovering from a GPU reset and some additional
3795 * steps need to be take between them. In this case (S3/S4) they are
3797 * Returns 0 on success, negative error code on failure.
3799 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3803 r = amdgpu_device_ip_resume_phase1(adev);
3807 r = amdgpu_device_fw_loading(adev);
3811 r = amdgpu_device_ip_resume_phase2(adev);
3813 if (adev->mman.buffer_funcs_ring->sched.ready)
3814 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3820 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3822 * @adev: amdgpu_device pointer
3824 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3826 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3828 if (amdgpu_sriov_vf(adev)) {
3829 if (adev->is_atom_fw) {
3830 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3831 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3833 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3834 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3837 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3838 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3843 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3845 * @asic_type: AMD asic type
3847 * Check if there is DC (new modesetting infrastructre) support for an asic.
3848 * returns true if DC has support, false if not.
3850 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3852 switch (asic_type) {
3853 #ifdef CONFIG_DRM_AMDGPU_SI
3857 /* chips with no display hardware */
3859 #if defined(CONFIG_DRM_AMD_DC)
3865 * We have systems in the wild with these ASICs that require
3866 * LVDS and VGA support which is not supported with DC.
3868 * Fallback to the non-DC driver here by default so as not to
3869 * cause regressions.
3871 #if defined(CONFIG_DRM_AMD_DC_SI)
3872 return amdgpu_dc > 0;
3881 * We have systems in the wild with these ASICs that require
3882 * VGA support which is not supported with DC.
3884 * Fallback to the non-DC driver here by default so as not to
3885 * cause regressions.
3887 return amdgpu_dc > 0;
3889 return amdgpu_dc != 0;
3893 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3900 * amdgpu_device_has_dc_support - check if dc is supported
3902 * @adev: amdgpu_device pointer
3904 * Returns true for supported, false for not supported
3906 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3908 if (adev->enable_virtual_display ||
3909 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3912 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3915 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3917 struct amdgpu_device *adev =
3918 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3919 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3921 /* It's a bug to not have a hive within this function */
3926 * Use task barrier to synchronize all xgmi reset works across the
3927 * hive. task_barrier_enter and task_barrier_exit will block
3928 * until all the threads running the xgmi reset works reach
3929 * those points. task_barrier_full will do both blocks.
3931 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3933 task_barrier_enter(&hive->tb);
3934 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3936 if (adev->asic_reset_res)
3939 task_barrier_exit(&hive->tb);
3940 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3942 if (adev->asic_reset_res)
3945 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3948 task_barrier_full(&hive->tb);
3949 adev->asic_reset_res = amdgpu_asic_reset(adev);
3953 if (adev->asic_reset_res)
3954 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3955 adev->asic_reset_res, adev_to_drm(adev)->unique);
3956 amdgpu_put_xgmi_hive(hive);
3959 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3961 char *input = amdgpu_lockup_timeout;
3962 char *timeout_setting = NULL;
3968 * By default timeout for non compute jobs is 10000
3969 * and 60000 for compute jobs.
3970 * In SR-IOV or passthrough mode, timeout for compute
3971 * jobs are 60000 by default.
3973 adev->gfx_timeout = msecs_to_jiffies(10000);
3974 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3975 if (amdgpu_sriov_vf(adev))
3976 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3977 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3979 adev->compute_timeout = msecs_to_jiffies(60000);
3981 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3982 while ((timeout_setting = strsep(&input, ",")) &&
3983 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3984 ret = kstrtol(timeout_setting, 0, &timeout);
3991 } else if (timeout < 0) {
3992 timeout = MAX_SCHEDULE_TIMEOUT;
3993 dev_warn(adev->dev, "lockup timeout disabled");
3994 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3996 timeout = msecs_to_jiffies(timeout);
4001 adev->gfx_timeout = timeout;
4004 adev->compute_timeout = timeout;
4007 adev->sdma_timeout = timeout;
4010 adev->video_timeout = timeout;
4017 * There is only one value specified and
4018 * it should apply to all non-compute jobs.
4021 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
4022 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
4023 adev->compute_timeout = adev->gfx_timeout;
4031 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4033 * @adev: amdgpu_device pointer
4035 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4037 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4039 struct iommu_domain *domain;
4041 domain = iommu_get_domain_for_dev(adev->dev);
4042 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4043 adev->ram_is_direct_mapped = true;
4046 #if defined(CONFIG_HSA_AMD_P2P)
4048 * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4050 * @adev: amdgpu_device pointer
4052 * return if IOMMU remapping bar address
4054 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4056 struct iommu_domain *domain;
4058 domain = iommu_get_domain_for_dev(adev->dev);
4059 if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4060 domain->type == IOMMU_DOMAIN_DMA_FQ))
4067 static const struct attribute *amdgpu_dev_attributes[] = {
4068 &dev_attr_pcie_replay_count.attr,
4072 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4074 if (amdgpu_mcbp == 1)
4075 adev->gfx.mcbp = true;
4076 else if (amdgpu_mcbp == 0)
4077 adev->gfx.mcbp = false;
4079 if (amdgpu_sriov_vf(adev))
4080 adev->gfx.mcbp = true;
4083 DRM_INFO("MCBP is enabled\n");
4087 * amdgpu_device_init - initialize the driver
4089 * @adev: amdgpu_device pointer
4090 * @flags: driver flags
4092 * Initializes the driver info and hw (all asics).
4093 * Returns 0 for success or an error on failure.
4094 * Called at driver startup.
4096 int amdgpu_device_init(struct amdgpu_device *adev,
4099 struct drm_device *ddev = adev_to_drm(adev);
4100 struct pci_dev *pdev = adev->pdev;
4106 adev->shutdown = false;
4107 adev->flags = flags;
4109 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4110 adev->asic_type = amdgpu_force_asic_type;
4112 adev->asic_type = flags & AMD_ASIC_MASK;
4114 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4115 if (amdgpu_emu_mode == 1)
4116 adev->usec_timeout *= 10;
4117 adev->gmc.gart_size = 512 * 1024 * 1024;
4118 adev->accel_working = false;
4119 adev->num_rings = 0;
4120 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4121 adev->mman.buffer_funcs = NULL;
4122 adev->mman.buffer_funcs_ring = NULL;
4123 adev->vm_manager.vm_pte_funcs = NULL;
4124 adev->vm_manager.vm_pte_num_scheds = 0;
4125 adev->gmc.gmc_funcs = NULL;
4126 adev->harvest_ip_mask = 0x0;
4127 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4128 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4130 adev->smc_rreg = &amdgpu_invalid_rreg;
4131 adev->smc_wreg = &amdgpu_invalid_wreg;
4132 adev->pcie_rreg = &amdgpu_invalid_rreg;
4133 adev->pcie_wreg = &amdgpu_invalid_wreg;
4134 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4135 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4136 adev->pciep_rreg = &amdgpu_invalid_rreg;
4137 adev->pciep_wreg = &amdgpu_invalid_wreg;
4138 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4139 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4140 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4141 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4142 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4143 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4144 adev->didt_rreg = &amdgpu_invalid_rreg;
4145 adev->didt_wreg = &amdgpu_invalid_wreg;
4146 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4147 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4148 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4149 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4151 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4152 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4153 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4155 /* mutex initialization are all done here so we
4156 * can recall function without having locking issues
4158 mutex_init(&adev->firmware.mutex);
4159 mutex_init(&adev->pm.mutex);
4160 mutex_init(&adev->gfx.gpu_clock_mutex);
4161 mutex_init(&adev->srbm_mutex);
4162 mutex_init(&adev->gfx.pipe_reserve_mutex);
4163 mutex_init(&adev->gfx.gfx_off_mutex);
4164 mutex_init(&adev->gfx.partition_mutex);
4165 mutex_init(&adev->grbm_idx_mutex);
4166 mutex_init(&adev->mn_lock);
4167 mutex_init(&adev->virt.vf_errors.lock);
4168 mutex_init(&adev->virt.rlcg_reg_lock);
4169 hash_init(adev->mn_hash);
4170 mutex_init(&adev->psp.mutex);
4171 mutex_init(&adev->notifier_lock);
4172 mutex_init(&adev->pm.stable_pstate_ctx_lock);
4173 mutex_init(&adev->benchmark_mutex);
4174 mutex_init(&adev->gfx.reset_sem_mutex);
4175 /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4176 mutex_init(&adev->enforce_isolation_mutex);
4177 mutex_init(&adev->gfx.kfd_sch_mutex);
4179 amdgpu_device_init_apu_flags(adev);
4181 r = amdgpu_device_check_arguments(adev);
4185 spin_lock_init(&adev->mmio_idx_lock);
4186 spin_lock_init(&adev->smc_idx_lock);
4187 spin_lock_init(&adev->pcie_idx_lock);
4188 spin_lock_init(&adev->uvd_ctx_idx_lock);
4189 spin_lock_init(&adev->didt_idx_lock);
4190 spin_lock_init(&adev->gc_cac_idx_lock);
4191 spin_lock_init(&adev->se_cac_idx_lock);
4192 spin_lock_init(&adev->audio_endpt_idx_lock);
4193 spin_lock_init(&adev->mm_stats.lock);
4194 spin_lock_init(&adev->wb.lock);
4196 INIT_LIST_HEAD(&adev->reset_list);
4198 INIT_LIST_HEAD(&adev->ras_list);
4200 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4202 INIT_DELAYED_WORK(&adev->delayed_init_work,
4203 amdgpu_device_delayed_init_work_handler);
4204 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4205 amdgpu_device_delay_enable_gfx_off);
4207 * Initialize the enforce_isolation work structures for each XCP
4208 * partition. This work handler is responsible for enforcing shader
4209 * isolation on AMD GPUs. It counts the number of emitted fences for
4210 * each GFX and compute ring. If there are any fences, it schedules
4211 * the `enforce_isolation_work` to be run after a delay. If there are
4212 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4215 for (i = 0; i < MAX_XCP; i++) {
4216 INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4217 amdgpu_gfx_enforce_isolation_handler);
4218 adev->gfx.enforce_isolation[i].adev = adev;
4219 adev->gfx.enforce_isolation[i].xcp_id = i;
4222 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4224 adev->gfx.gfx_off_req_count = 1;
4225 adev->gfx.gfx_off_residency = 0;
4226 adev->gfx.gfx_off_entrycount = 0;
4227 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4229 atomic_set(&adev->throttling_logging_enabled, 1);
4231 * If throttling continues, logging will be performed every minute
4232 * to avoid log flooding. "-1" is subtracted since the thermal
4233 * throttling interrupt comes every second. Thus, the total logging
4234 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4235 * for throttling interrupt) = 60 seconds.
4237 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4238 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4240 /* Registers mapping */
4241 /* TODO: block userspace mapping of io register */
4242 if (adev->asic_type >= CHIP_BONAIRE) {
4243 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4244 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4246 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4247 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4250 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4251 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4253 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4257 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4258 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4261 * Reset domain needs to be present early, before XGMI hive discovered
4262 * (if any) and intitialized to use reset sem and in_gpu reset flag
4263 * early on during init and before calling to RREG32.
4265 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4266 if (!adev->reset_domain)
4269 /* detect hw virtualization here */
4270 amdgpu_detect_virtualization(adev);
4272 amdgpu_device_get_pcie_info(adev);
4274 r = amdgpu_device_get_job_timeout_settings(adev);
4276 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4280 amdgpu_device_set_mcbp(adev);
4283 * By default, use default mode where all blocks are expected to be
4284 * initialized. At present a 'swinit' of blocks is required to be
4285 * completed before the need for a different level is detected.
4287 amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
4288 /* early init functions */
4289 r = amdgpu_device_ip_early_init(adev);
4293 /* Get rid of things like offb */
4294 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4298 /* Enable TMZ based on IP_VERSION */
4299 amdgpu_gmc_tmz_set(adev);
4301 if (amdgpu_sriov_vf(adev) &&
4302 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4303 /* VF MMIO access (except mailbox range) from CPU
4304 * will be blocked during sriov runtime
4306 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4308 amdgpu_gmc_noretry_set(adev);
4309 /* Need to get xgmi info early to decide the reset behavior*/
4310 if (adev->gmc.xgmi.supported) {
4311 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4316 /* enable PCIE atomic ops */
4317 if (amdgpu_sriov_vf(adev)) {
4318 if (adev->virt.fw_reserve.p_pf2vf)
4319 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4320 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4321 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4322 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4323 * internal path natively support atomics, set have_atomics_support to true.
4325 } else if ((adev->flags & AMD_IS_APU) &&
4326 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4327 IP_VERSION(9, 0, 0))) {
4328 adev->have_atomics_support = true;
4330 adev->have_atomics_support =
4331 !pci_enable_atomic_ops_to_root(adev->pdev,
4332 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4333 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4336 if (!adev->have_atomics_support)
4337 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4339 /* doorbell bar mapping and doorbell index init*/
4340 amdgpu_doorbell_init(adev);
4342 if (amdgpu_emu_mode == 1) {
4343 /* post the asic on emulation mode */
4344 emu_soc_asic_init(adev);
4345 goto fence_driver_init;
4348 amdgpu_reset_init(adev);
4350 /* detect if we are with an SRIOV vbios */
4352 amdgpu_device_detect_sriov_bios(adev);
4354 /* check if we need to reset the asic
4355 * E.g., driver was not cleanly unloaded previously, etc.
4357 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4358 if (adev->gmc.xgmi.num_physical_nodes) {
4359 dev_info(adev->dev, "Pending hive reset.\n");
4360 amdgpu_set_init_level(adev,
4361 AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4362 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4363 !amdgpu_device_has_display_hardware(adev)) {
4364 r = psp_gpu_reset(adev);
4366 tmp = amdgpu_reset_method;
4367 /* It should do a default reset when loading or reloading the driver,
4368 * regardless of the module parameter reset_method.
4370 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4371 r = amdgpu_asic_reset(adev);
4372 amdgpu_reset_method = tmp;
4376 dev_err(adev->dev, "asic reset on init failed\n");
4381 /* Post card if necessary */
4382 if (amdgpu_device_need_post(adev)) {
4384 dev_err(adev->dev, "no vBIOS found\n");
4388 DRM_INFO("GPU posting now...\n");
4389 r = amdgpu_device_asic_init(adev);
4391 dev_err(adev->dev, "gpu post error!\n");
4397 if (adev->is_atom_fw) {
4398 /* Initialize clocks */
4399 r = amdgpu_atomfirmware_get_clock_info(adev);
4401 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4402 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4406 /* Initialize clocks */
4407 r = amdgpu_atombios_get_clock_info(adev);
4409 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4410 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4413 /* init i2c buses */
4414 if (!amdgpu_device_has_dc_support(adev))
4415 amdgpu_atombios_i2c_init(adev);
4421 r = amdgpu_fence_driver_sw_init(adev);
4423 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4424 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4428 /* init the mode config */
4429 drm_mode_config_init(adev_to_drm(adev));
4431 r = amdgpu_device_ip_init(adev);
4433 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4434 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4435 goto release_ras_con;
4438 amdgpu_fence_driver_hw_init(adev);
4441 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4442 adev->gfx.config.max_shader_engines,
4443 adev->gfx.config.max_sh_per_se,
4444 adev->gfx.config.max_cu_per_sh,
4445 adev->gfx.cu_info.number);
4447 adev->accel_working = true;
4449 amdgpu_vm_check_compute_bug(adev);
4451 /* Initialize the buffer migration limit. */
4452 if (amdgpu_moverate >= 0)
4453 max_MBps = amdgpu_moverate;
4455 max_MBps = 8; /* Allow 8 MB/s. */
4456 /* Get a log2 for easy divisions. */
4457 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4460 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4461 * Otherwise the mgpu fan boost feature will be skipped due to the
4462 * gpu instance is counted less.
4464 amdgpu_register_gpu_instance(adev);
4466 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4467 * explicit gating rather than handling it automatically.
4469 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4470 r = amdgpu_device_ip_late_init(adev);
4472 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4473 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4474 goto release_ras_con;
4477 amdgpu_ras_resume(adev);
4478 queue_delayed_work(system_wq, &adev->delayed_init_work,
4479 msecs_to_jiffies(AMDGPU_RESUME_MS));
4482 if (amdgpu_sriov_vf(adev)) {
4483 amdgpu_virt_release_full_gpu(adev, true);
4484 flush_delayed_work(&adev->delayed_init_work);
4488 * Place those sysfs registering after `late_init`. As some of those
4489 * operations performed in `late_init` might affect the sysfs
4490 * interfaces creating.
4492 r = amdgpu_atombios_sysfs_init(adev);
4494 drm_err(&adev->ddev,
4495 "registering atombios sysfs failed (%d).\n", r);
4497 r = amdgpu_pm_sysfs_init(adev);
4499 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4501 r = amdgpu_ucode_sysfs_init(adev);
4503 adev->ucode_sysfs_en = false;
4504 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4506 adev->ucode_sysfs_en = true;
4508 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4510 dev_err(adev->dev, "Could not create amdgpu device attr\n");
4512 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4515 "Could not create amdgpu board attributes\n");
4517 amdgpu_fru_sysfs_init(adev);
4518 amdgpu_reg_state_sysfs_init(adev);
4519 amdgpu_xcp_cfg_sysfs_init(adev);
4521 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4522 r = amdgpu_pmu_init(adev);
4524 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4526 /* Have stored pci confspace at hand for restore in sudden PCI error */
4527 if (amdgpu_device_cache_pci_state(adev->pdev))
4528 pci_restore_state(pdev);
4530 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4531 /* this will fail for cards that aren't VGA class devices, just
4534 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4535 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4537 px = amdgpu_device_supports_px(ddev);
4539 if (px || (!dev_is_removable(&adev->pdev->dev) &&
4540 apple_gmux_detect(NULL, NULL)))
4541 vga_switcheroo_register_client(adev->pdev,
4542 &amdgpu_switcheroo_ops, px);
4545 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4547 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4548 amdgpu_xgmi_reset_on_init(adev);
4550 amdgpu_device_check_iommu_direct_map(adev);
4555 if (amdgpu_sriov_vf(adev))
4556 amdgpu_virt_release_full_gpu(adev, true);
4558 /* failed in exclusive mode due to timeout */
4559 if (amdgpu_sriov_vf(adev) &&
4560 !amdgpu_sriov_runtime(adev) &&
4561 amdgpu_virt_mmio_blocked(adev) &&
4562 !amdgpu_virt_wait_reset(adev)) {
4563 dev_err(adev->dev, "VF exclusive mode timeout\n");
4564 /* Don't send request since VF is inactive. */
4565 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4566 adev->virt.ops = NULL;
4569 amdgpu_release_ras_context(adev);
4572 amdgpu_vf_error_trans_all(adev);
4577 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4580 /* Clear all CPU mappings pointing to this device */
4581 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4583 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4584 amdgpu_doorbell_fini(adev);
4586 iounmap(adev->rmmio);
4588 if (adev->mman.aper_base_kaddr)
4589 iounmap(adev->mman.aper_base_kaddr);
4590 adev->mman.aper_base_kaddr = NULL;
4592 /* Memory manager related */
4593 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4594 arch_phys_wc_del(adev->gmc.vram_mtrr);
4595 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4600 * amdgpu_device_fini_hw - tear down the driver
4602 * @adev: amdgpu_device pointer
4604 * Tear down the driver info (all asics).
4605 * Called at driver shutdown.
4607 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4609 dev_info(adev->dev, "amdgpu: finishing device.\n");
4610 flush_delayed_work(&adev->delayed_init_work);
4612 if (adev->mman.initialized)
4613 drain_workqueue(adev->mman.bdev.wq);
4614 adev->shutdown = true;
4616 /* make sure IB test finished before entering exclusive mode
4617 * to avoid preemption on IB test
4619 if (amdgpu_sriov_vf(adev)) {
4620 amdgpu_virt_request_full_gpu(adev, false);
4621 amdgpu_virt_fini_data_exchange(adev);
4624 /* disable all interrupts */
4625 amdgpu_irq_disable_all(adev);
4626 if (adev->mode_info.mode_config_initialized) {
4627 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4628 drm_helper_force_disable_all(adev_to_drm(adev));
4630 drm_atomic_helper_shutdown(adev_to_drm(adev));
4632 amdgpu_fence_driver_hw_fini(adev);
4634 if (adev->pm.sysfs_initialized)
4635 amdgpu_pm_sysfs_fini(adev);
4636 if (adev->ucode_sysfs_en)
4637 amdgpu_ucode_sysfs_fini(adev);
4638 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4639 amdgpu_fru_sysfs_fini(adev);
4641 amdgpu_reg_state_sysfs_fini(adev);
4642 amdgpu_xcp_cfg_sysfs_fini(adev);
4644 /* disable ras feature must before hw fini */
4645 amdgpu_ras_pre_fini(adev);
4647 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4649 amdgpu_device_ip_fini_early(adev);
4651 amdgpu_irq_fini_hw(adev);
4653 if (adev->mman.initialized)
4654 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4656 amdgpu_gart_dummy_page_fini(adev);
4658 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4659 amdgpu_device_unmap_mmio(adev);
4663 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4668 amdgpu_fence_driver_sw_fini(adev);
4669 amdgpu_device_ip_fini(adev);
4670 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4671 adev->accel_working = false;
4672 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4674 amdgpu_reset_fini(adev);
4676 /* free i2c buses */
4677 if (!amdgpu_device_has_dc_support(adev))
4678 amdgpu_i2c_fini(adev);
4680 if (amdgpu_emu_mode != 1)
4681 amdgpu_atombios_fini(adev);
4686 kfree(adev->fru_info);
4687 adev->fru_info = NULL;
4689 px = amdgpu_device_supports_px(adev_to_drm(adev));
4691 if (px || (!dev_is_removable(&adev->pdev->dev) &&
4692 apple_gmux_detect(NULL, NULL)))
4693 vga_switcheroo_unregister_client(adev->pdev);
4696 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4698 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4699 vga_client_unregister(adev->pdev);
4701 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4703 iounmap(adev->rmmio);
4705 amdgpu_doorbell_fini(adev);
4709 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4710 amdgpu_pmu_fini(adev);
4711 if (adev->mman.discovery_bin)
4712 amdgpu_discovery_fini(adev);
4714 amdgpu_reset_put_reset_domain(adev->reset_domain);
4715 adev->reset_domain = NULL;
4717 kfree(adev->pci_state);
4722 * amdgpu_device_evict_resources - evict device resources
4723 * @adev: amdgpu device object
4725 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4726 * of the vram memory type. Mainly used for evicting device resources
4730 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4734 /* No need to evict vram on APUs for suspend to ram or s2idle */
4735 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4738 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4740 DRM_WARN("evicting device resources failed\n");
4748 * amdgpu_device_prepare - prepare for device suspend
4750 * @dev: drm dev pointer
4752 * Prepare to put the hw in the suspend state (all asics).
4753 * Returns 0 for success or an error on failure.
4754 * Called at driver suspend.
4756 int amdgpu_device_prepare(struct drm_device *dev)
4758 struct amdgpu_device *adev = drm_to_adev(dev);
4761 amdgpu_choose_low_power_state(adev);
4763 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4766 /* Evict the majority of BOs before starting suspend sequence */
4767 r = amdgpu_device_evict_resources(adev);
4771 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4773 for (i = 0; i < adev->num_ip_blocks; i++) {
4774 if (!adev->ip_blocks[i].status.valid)
4776 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4778 r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4786 adev->in_s0ix = adev->in_s3 = false;
4792 * amdgpu_device_suspend - initiate device suspend
4794 * @dev: drm dev pointer
4795 * @fbcon : notify the fbdev of suspend
4797 * Puts the hw in the suspend state (all asics).
4798 * Returns 0 for success or an error on failure.
4799 * Called at driver suspend.
4801 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4803 struct amdgpu_device *adev = drm_to_adev(dev);
4806 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4809 adev->in_suspend = true;
4811 if (amdgpu_sriov_vf(adev)) {
4812 amdgpu_virt_fini_data_exchange(adev);
4813 r = amdgpu_virt_request_full_gpu(adev, false);
4818 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4819 DRM_WARN("smart shift update failed\n");
4822 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4824 cancel_delayed_work_sync(&adev->delayed_init_work);
4826 amdgpu_ras_suspend(adev);
4828 amdgpu_device_ip_suspend_phase1(adev);
4831 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4833 r = amdgpu_device_evict_resources(adev);
4837 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4839 amdgpu_fence_driver_hw_fini(adev);
4841 amdgpu_device_ip_suspend_phase2(adev);
4843 if (amdgpu_sriov_vf(adev))
4844 amdgpu_virt_release_full_gpu(adev, false);
4846 r = amdgpu_dpm_notify_rlc_state(adev, false);
4854 * amdgpu_device_resume - initiate device resume
4856 * @dev: drm dev pointer
4857 * @fbcon : notify the fbdev of resume
4859 * Bring the hw back to operating state (all asics).
4860 * Returns 0 for success or an error on failure.
4861 * Called at driver resume.
4863 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4865 struct amdgpu_device *adev = drm_to_adev(dev);
4868 if (amdgpu_sriov_vf(adev)) {
4869 r = amdgpu_virt_request_full_gpu(adev, true);
4874 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4878 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4881 if (amdgpu_device_need_post(adev)) {
4882 r = amdgpu_device_asic_init(adev);
4884 dev_err(adev->dev, "amdgpu asic init failed\n");
4887 r = amdgpu_device_ip_resume(adev);
4890 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4893 amdgpu_fence_driver_hw_init(adev);
4895 if (!adev->in_s0ix) {
4896 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4901 r = amdgpu_device_ip_late_init(adev);
4905 queue_delayed_work(system_wq, &adev->delayed_init_work,
4906 msecs_to_jiffies(AMDGPU_RESUME_MS));
4908 if (amdgpu_sriov_vf(adev)) {
4909 amdgpu_virt_init_data_exchange(adev);
4910 amdgpu_virt_release_full_gpu(adev, true);
4916 /* Make sure IB tests flushed */
4917 flush_delayed_work(&adev->delayed_init_work);
4920 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4922 amdgpu_ras_resume(adev);
4924 if (adev->mode_info.num_crtc) {
4926 * Most of the connector probing functions try to acquire runtime pm
4927 * refs to ensure that the GPU is powered on when connector polling is
4928 * performed. Since we're calling this from a runtime PM callback,
4929 * trying to acquire rpm refs will cause us to deadlock.
4931 * Since we're guaranteed to be holding the rpm lock, it's safe to
4932 * temporarily disable the rpm helpers so this doesn't deadlock us.
4935 dev->dev->power.disable_depth++;
4937 if (!adev->dc_enabled)
4938 drm_helper_hpd_irq_event(dev);
4940 drm_kms_helper_hotplug_event(dev);
4942 dev->dev->power.disable_depth--;
4945 adev->in_suspend = false;
4947 if (adev->enable_mes)
4948 amdgpu_mes_self_test(adev);
4950 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4951 DRM_WARN("smart shift update failed\n");
4957 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4959 * @adev: amdgpu_device pointer
4961 * The list of all the hardware IPs that make up the asic is walked and
4962 * the check_soft_reset callbacks are run. check_soft_reset determines
4963 * if the asic is still hung or not.
4964 * Returns true if any of the IPs are still in a hung state, false if not.
4966 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4969 bool asic_hang = false;
4971 if (amdgpu_sriov_vf(adev))
4974 if (amdgpu_asic_need_full_reset(adev))
4977 for (i = 0; i < adev->num_ip_blocks; i++) {
4978 if (!adev->ip_blocks[i].status.valid)
4980 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4981 adev->ip_blocks[i].status.hang =
4982 adev->ip_blocks[i].version->funcs->check_soft_reset(
4983 &adev->ip_blocks[i]);
4984 if (adev->ip_blocks[i].status.hang) {
4985 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4993 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4995 * @adev: amdgpu_device pointer
4997 * The list of all the hardware IPs that make up the asic is walked and the
4998 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4999 * handles any IP specific hardware or software state changes that are
5000 * necessary for a soft reset to succeed.
5001 * Returns 0 on success, negative error code on failure.
5003 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5007 for (i = 0; i < adev->num_ip_blocks; i++) {
5008 if (!adev->ip_blocks[i].status.valid)
5010 if (adev->ip_blocks[i].status.hang &&
5011 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5012 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5022 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5024 * @adev: amdgpu_device pointer
5026 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
5027 * reset is necessary to recover.
5028 * Returns true if a full asic reset is required, false if not.
5030 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5034 if (amdgpu_asic_need_full_reset(adev))
5037 for (i = 0; i < adev->num_ip_blocks; i++) {
5038 if (!adev->ip_blocks[i].status.valid)
5040 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5041 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5042 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5043 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5044 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5045 if (adev->ip_blocks[i].status.hang) {
5046 dev_info(adev->dev, "Some block need full reset!\n");
5055 * amdgpu_device_ip_soft_reset - do a soft reset
5057 * @adev: amdgpu_device pointer
5059 * The list of all the hardware IPs that make up the asic is walked and the
5060 * soft_reset callbacks are run if the block is hung. soft_reset handles any
5061 * IP specific hardware or software state changes that are necessary to soft
5063 * Returns 0 on success, negative error code on failure.
5065 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5069 for (i = 0; i < adev->num_ip_blocks; i++) {
5070 if (!adev->ip_blocks[i].status.valid)
5072 if (adev->ip_blocks[i].status.hang &&
5073 adev->ip_blocks[i].version->funcs->soft_reset) {
5074 r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5084 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5086 * @adev: amdgpu_device pointer
5088 * The list of all the hardware IPs that make up the asic is walked and the
5089 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
5090 * handles any IP specific hardware or software state changes that are
5091 * necessary after the IP has been soft reset.
5092 * Returns 0 on success, negative error code on failure.
5094 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5098 for (i = 0; i < adev->num_ip_blocks; i++) {
5099 if (!adev->ip_blocks[i].status.valid)
5101 if (adev->ip_blocks[i].status.hang &&
5102 adev->ip_blocks[i].version->funcs->post_soft_reset)
5103 r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5112 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5114 * @adev: amdgpu_device pointer
5115 * @reset_context: amdgpu reset context pointer
5117 * do VF FLR and reinitialize Asic
5118 * return 0 means succeeded otherwise failed
5120 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5121 struct amdgpu_reset_context *reset_context)
5124 struct amdgpu_hive_info *hive = NULL;
5126 if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5127 if (!amdgpu_ras_get_fed_status(adev))
5128 amdgpu_virt_ready_to_reset(adev);
5129 amdgpu_virt_wait_reset(adev);
5130 clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5131 r = amdgpu_virt_request_full_gpu(adev, true);
5133 r = amdgpu_virt_reset_gpu(adev);
5138 amdgpu_ras_set_fed(adev, false);
5139 amdgpu_irq_gpu_reset_resume_helper(adev);
5141 /* some sw clean up VF needs to do before recover */
5142 amdgpu_virt_post_reset(adev);
5144 /* Resume IP prior to SMC */
5145 r = amdgpu_device_ip_reinit_early_sriov(adev);
5149 amdgpu_virt_init_data_exchange(adev);
5151 r = amdgpu_device_fw_loading(adev);
5155 /* now we are okay to resume SMC/CP/SDMA */
5156 r = amdgpu_device_ip_reinit_late_sriov(adev);
5160 hive = amdgpu_get_xgmi_hive(adev);
5161 /* Update PSP FW topology after reset */
5162 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5163 r = amdgpu_xgmi_update_topology(hive, adev);
5165 amdgpu_put_xgmi_hive(hive);
5169 r = amdgpu_ib_ring_tests(adev);
5173 if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5174 amdgpu_inc_vram_lost(adev);
5176 /* need to be called during full access so we can't do it later like
5179 amdgpu_amdkfd_post_reset(adev);
5180 amdgpu_virt_release_full_gpu(adev, true);
5182 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5183 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5184 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5185 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5186 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5187 amdgpu_ras_resume(adev);
5192 * amdgpu_device_has_job_running - check if there is any job in mirror list
5194 * @adev: amdgpu_device pointer
5196 * check if there is any job in mirror list
5198 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5201 struct drm_sched_job *job;
5203 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5204 struct amdgpu_ring *ring = adev->rings[i];
5206 if (!amdgpu_ring_sched_ready(ring))
5209 spin_lock(&ring->sched.job_list_lock);
5210 job = list_first_entry_or_null(&ring->sched.pending_list,
5211 struct drm_sched_job, list);
5212 spin_unlock(&ring->sched.job_list_lock);
5220 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5222 * @adev: amdgpu_device pointer
5224 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5227 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5230 if (amdgpu_gpu_recovery == 0)
5233 /* Skip soft reset check in fatal error mode */
5234 if (!amdgpu_ras_is_poison_mode_supported(adev))
5237 if (amdgpu_sriov_vf(adev))
5240 if (amdgpu_gpu_recovery == -1) {
5241 switch (adev->asic_type) {
5242 #ifdef CONFIG_DRM_AMDGPU_SI
5249 #ifdef CONFIG_DRM_AMDGPU_CIK
5256 case CHIP_CYAN_SKILLFISH:
5266 dev_info(adev->dev, "GPU recovery disabled.\n");
5270 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5275 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5277 dev_info(adev->dev, "GPU mode1 reset\n");
5279 /* Cache the state before bus master disable. The saved config space
5280 * values are used in other cases like restore after mode-2 reset.
5282 amdgpu_device_cache_pci_state(adev->pdev);
5285 pci_clear_master(adev->pdev);
5287 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5288 dev_info(adev->dev, "GPU smu mode1 reset\n");
5289 ret = amdgpu_dpm_mode1_reset(adev);
5291 dev_info(adev->dev, "GPU psp mode1 reset\n");
5292 ret = psp_gpu_reset(adev);
5296 goto mode1_reset_failed;
5298 amdgpu_device_load_pci_state(adev->pdev);
5299 ret = amdgpu_psp_wait_for_bootloader(adev);
5301 goto mode1_reset_failed;
5303 /* wait for asic to come out of reset */
5304 for (i = 0; i < adev->usec_timeout; i++) {
5305 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5307 if (memsize != 0xffffffff)
5312 if (i >= adev->usec_timeout) {
5314 goto mode1_reset_failed;
5317 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5322 dev_err(adev->dev, "GPU mode1 reset failed\n");
5326 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5327 struct amdgpu_reset_context *reset_context)
5330 struct amdgpu_job *job = NULL;
5331 struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5332 bool need_full_reset =
5333 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5335 if (reset_context->reset_req_dev == adev)
5336 job = reset_context->job;
5338 if (amdgpu_sriov_vf(adev))
5339 amdgpu_virt_pre_reset(adev);
5341 amdgpu_fence_driver_isr_toggle(adev, true);
5343 /* block all schedulers and reset given job's ring */
5344 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5345 struct amdgpu_ring *ring = adev->rings[i];
5347 if (!amdgpu_ring_sched_ready(ring))
5350 /* Clear job fence from fence drv to avoid force_completion
5351 * leave NULL and vm flush fence in fence drv
5353 amdgpu_fence_driver_clear_job_fences(ring);
5355 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5356 amdgpu_fence_driver_force_completion(ring);
5359 amdgpu_fence_driver_isr_toggle(adev, false);
5362 drm_sched_increase_karma(&job->base);
5364 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5365 /* If reset handler not implemented, continue; otherwise return */
5366 if (r == -EOPNOTSUPP)
5371 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5372 if (!amdgpu_sriov_vf(adev)) {
5374 if (!need_full_reset)
5375 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5377 if (!need_full_reset && amdgpu_gpu_recovery &&
5378 amdgpu_device_ip_check_soft_reset(adev)) {
5379 amdgpu_device_ip_pre_soft_reset(adev);
5380 r = amdgpu_device_ip_soft_reset(adev);
5381 amdgpu_device_ip_post_soft_reset(adev);
5382 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5383 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5384 need_full_reset = true;
5388 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5389 dev_info(tmp_adev->dev, "Dumping IP State\n");
5390 /* Trigger ip dump before we reset the asic */
5391 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5392 if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5393 tmp_adev->ip_blocks[i].version->funcs
5394 ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5395 dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5398 if (need_full_reset)
5399 r = amdgpu_device_ip_suspend(adev);
5400 if (need_full_reset)
5401 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5403 clear_bit(AMDGPU_NEED_FULL_RESET,
5404 &reset_context->flags);
5410 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5412 struct list_head *device_list_handle;
5413 bool full_reset, vram_lost = false;
5414 struct amdgpu_device *tmp_adev;
5417 device_list_handle = reset_context->reset_device_list;
5419 if (!device_list_handle)
5422 full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5425 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5426 /* After reset, it's default init level */
5427 amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
5430 amdgpu_ras_set_fed(tmp_adev, false);
5431 r = amdgpu_device_asic_init(tmp_adev);
5433 dev_warn(tmp_adev->dev, "asic atom init failed!");
5435 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5437 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5441 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5443 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5444 amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5447 DRM_INFO("VRAM is lost due to GPU reset!\n");
5448 amdgpu_inc_vram_lost(tmp_adev);
5451 r = amdgpu_device_fw_loading(tmp_adev);
5455 r = amdgpu_xcp_restore_partition_mode(
5460 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5464 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5465 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5468 amdgpu_device_fill_reset_magic(tmp_adev);
5471 * Add this ASIC as tracked as reset was already
5472 * complete successfully.
5474 amdgpu_register_gpu_instance(tmp_adev);
5476 if (!reset_context->hive &&
5477 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5478 amdgpu_xgmi_add_device(tmp_adev);
5480 r = amdgpu_device_ip_late_init(tmp_adev);
5484 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5487 * The GPU enters bad state once faulty pages
5488 * by ECC has reached the threshold, and ras
5489 * recovery is scheduled next. So add one check
5490 * here to break recovery if it indeed exceeds
5491 * bad page threshold, and remind user to
5492 * retire this GPU or setting one bigger
5493 * bad_page_threshold value to fix this once
5494 * probing driver again.
5496 if (!amdgpu_ras_is_rma(tmp_adev)) {
5498 amdgpu_ras_resume(tmp_adev);
5504 /* Update PSP FW topology after reset */
5505 if (reset_context->hive &&
5506 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5507 r = amdgpu_xgmi_update_topology(
5508 reset_context->hive, tmp_adev);
5514 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5515 r = amdgpu_ib_ring_tests(tmp_adev);
5517 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5524 tmp_adev->asic_reset_res = r;
5531 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5532 struct amdgpu_reset_context *reset_context)
5534 struct amdgpu_device *tmp_adev = NULL;
5535 bool need_full_reset, skip_hw_reset;
5538 /* Try reset handler method first */
5539 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5542 reset_context->reset_device_list = device_list_handle;
5543 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5544 /* If reset handler not implemented, continue; otherwise return */
5545 if (r == -EOPNOTSUPP)
5550 /* Reset handler not implemented, use the default method */
5552 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5553 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5556 * ASIC reset has to be done on all XGMI hive nodes ASAP
5557 * to allow proper links negotiation in FW (within 1 sec)
5559 if (!skip_hw_reset && need_full_reset) {
5560 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5561 /* For XGMI run all resets in parallel to speed up the process */
5562 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5563 if (!queue_work(system_unbound_wq,
5564 &tmp_adev->xgmi_reset_work))
5567 r = amdgpu_asic_reset(tmp_adev);
5570 dev_err(tmp_adev->dev,
5571 "ASIC reset failed with error, %d for drm dev, %s",
5572 r, adev_to_drm(tmp_adev)->unique);
5577 /* For XGMI wait for all resets to complete before proceed */
5579 list_for_each_entry(tmp_adev, device_list_handle,
5581 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5582 flush_work(&tmp_adev->xgmi_reset_work);
5583 r = tmp_adev->asic_reset_res;
5591 if (!r && amdgpu_ras_intr_triggered()) {
5592 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5593 amdgpu_ras_reset_error_count(tmp_adev,
5594 AMDGPU_RAS_BLOCK__MMHUB);
5597 amdgpu_ras_intr_cleared();
5600 r = amdgpu_device_reinit_after_reset(reset_context);
5602 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5604 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5610 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5613 switch (amdgpu_asic_reset_method(adev)) {
5614 case AMD_RESET_METHOD_MODE1:
5615 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5617 case AMD_RESET_METHOD_MODE2:
5618 adev->mp1_state = PP_MP1_STATE_RESET;
5621 adev->mp1_state = PP_MP1_STATE_NONE;
5626 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5628 amdgpu_vf_error_trans_all(adev);
5629 adev->mp1_state = PP_MP1_STATE_NONE;
5632 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5634 struct pci_dev *p = NULL;
5636 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5637 adev->pdev->bus->number, 1);
5639 pm_runtime_enable(&(p->dev));
5640 pm_runtime_resume(&(p->dev));
5646 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5648 enum amd_reset_method reset_method;
5649 struct pci_dev *p = NULL;
5653 * For now, only BACO and mode1 reset are confirmed
5654 * to suffer the audio issue without proper suspended.
5656 reset_method = amdgpu_asic_reset_method(adev);
5657 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5658 (reset_method != AMD_RESET_METHOD_MODE1))
5661 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5662 adev->pdev->bus->number, 1);
5666 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5669 * If we cannot get the audio device autosuspend delay,
5670 * a fixed 4S interval will be used. Considering 3S is
5671 * the audio controller default autosuspend delay setting.
5672 * 4S used here is guaranteed to cover that.
5674 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5676 while (!pm_runtime_status_suspended(&(p->dev))) {
5677 if (!pm_runtime_suspend(&(p->dev)))
5680 if (expires < ktime_get_mono_fast_ns()) {
5681 dev_warn(adev->dev, "failed to suspend display audio\n");
5683 /* TODO: abort the succeeding gpu reset? */
5688 pm_runtime_disable(&(p->dev));
5694 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5696 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5698 #if defined(CONFIG_DEBUG_FS)
5699 if (!amdgpu_sriov_vf(adev))
5700 cancel_work(&adev->reset_work);
5704 cancel_work(&adev->kfd.reset_work);
5706 if (amdgpu_sriov_vf(adev))
5707 cancel_work(&adev->virt.flr_work);
5709 if (con && adev->ras_enabled)
5710 cancel_work(&con->recovery_work);
5714 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5716 struct amdgpu_device *tmp_adev;
5720 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5721 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5722 if (PCI_POSSIBLE_ERROR(status)) {
5723 dev_err(tmp_adev->dev, "device lost from bus!");
5732 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5734 * @adev: amdgpu_device pointer
5735 * @job: which job trigger hang
5736 * @reset_context: amdgpu reset context pointer
5738 * Attempt to reset the GPU if it has hung (all asics).
5739 * Attempt to do soft-reset or full-reset and reinitialize Asic
5740 * Returns 0 for success or an error on failure.
5743 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5744 struct amdgpu_job *job,
5745 struct amdgpu_reset_context *reset_context)
5747 struct list_head device_list, *device_list_handle = NULL;
5748 bool job_signaled = false;
5749 struct amdgpu_hive_info *hive = NULL;
5750 struct amdgpu_device *tmp_adev = NULL;
5752 bool need_emergency_restart = false;
5753 bool audio_suspended = false;
5754 int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5757 * Special case: RAS triggered and full reset isn't supported
5759 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5762 * Flush RAM to disk so that after reboot
5763 * the user can read log and see why the system rebooted.
5765 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5766 amdgpu_ras_get_context(adev)->reboot) {
5767 DRM_WARN("Emergency reboot.");
5770 emergency_restart();
5773 dev_info(adev->dev, "GPU %s begin!\n",
5774 need_emergency_restart ? "jobs stop":"reset");
5776 if (!amdgpu_sriov_vf(adev))
5777 hive = amdgpu_get_xgmi_hive(adev);
5779 mutex_lock(&hive->hive_lock);
5781 reset_context->job = job;
5782 reset_context->hive = hive;
5784 * Build list of devices to reset.
5785 * In case we are in XGMI hive mode, resort the device list
5786 * to put adev in the 1st position.
5788 INIT_LIST_HEAD(&device_list);
5789 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5790 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5791 list_add_tail(&tmp_adev->reset_list, &device_list);
5793 tmp_adev->shutdown = true;
5795 if (!list_is_first(&adev->reset_list, &device_list))
5796 list_rotate_to_front(&adev->reset_list, &device_list);
5797 device_list_handle = &device_list;
5799 list_add_tail(&adev->reset_list, &device_list);
5800 device_list_handle = &device_list;
5803 if (!amdgpu_sriov_vf(adev)) {
5804 r = amdgpu_device_health_check(device_list_handle);
5809 /* We need to lock reset domain only once both for XGMI and single device */
5810 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5812 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5814 /* block all schedulers and reset given job's ring */
5815 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5817 amdgpu_device_set_mp1_state(tmp_adev);
5820 * Try to put the audio codec into suspend state
5821 * before gpu reset started.
5823 * Due to the power domain of the graphics device
5824 * is shared with AZ power domain. Without this,
5825 * we may change the audio hardware from behind
5826 * the audio driver's back. That will trigger
5827 * some audio codec errors.
5829 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5830 audio_suspended = true;
5832 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5834 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5836 amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5839 * Mark these ASICs to be reseted as untracked first
5840 * And add them back after reset completed
5842 amdgpu_unregister_gpu_instance(tmp_adev);
5844 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5846 /* disable ras on ALL IPs */
5847 if (!need_emergency_restart &&
5848 amdgpu_device_ip_need_full_reset(tmp_adev))
5849 amdgpu_ras_suspend(tmp_adev);
5851 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5852 struct amdgpu_ring *ring = tmp_adev->rings[i];
5854 if (!amdgpu_ring_sched_ready(ring))
5857 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5859 if (need_emergency_restart)
5860 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5862 atomic_inc(&tmp_adev->gpu_reset_counter);
5865 if (need_emergency_restart)
5866 goto skip_sched_resume;
5869 * Must check guilty signal here since after this point all old
5870 * HW fences are force signaled.
5872 * job->base holds a reference to parent fence
5874 if (job && dma_fence_is_signaled(&job->hw_fence)) {
5875 job_signaled = true;
5876 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5880 retry: /* Rest of adevs pre asic reset from XGMI hive. */
5881 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5882 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5883 /*TODO Should we stop ?*/
5885 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5886 r, adev_to_drm(tmp_adev)->unique);
5887 tmp_adev->asic_reset_res = r;
5891 /* Actual ASIC resets if needed.*/
5892 /* Host driver will handle XGMI hive reset for SRIOV */
5893 if (amdgpu_sriov_vf(adev)) {
5894 if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5895 dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5896 amdgpu_ras_set_fed(adev, true);
5897 set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5900 r = amdgpu_device_reset_sriov(adev, reset_context);
5901 if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5902 amdgpu_virt_release_full_gpu(adev, true);
5906 adev->asic_reset_res = r;
5908 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5909 if (r && r == -EAGAIN)
5913 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5915 * Drop any pending non scheduler resets queued before reset is done.
5916 * Any reset scheduled after this point would be valid. Scheduler resets
5917 * were already dropped during drm_sched_stop and no new ones can come
5918 * in before drm_sched_start.
5920 amdgpu_device_stop_pending_resets(tmp_adev);
5925 /* Post ASIC reset for all devs .*/
5926 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5928 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5929 struct amdgpu_ring *ring = tmp_adev->rings[i];
5931 if (!amdgpu_ring_sched_ready(ring))
5934 drm_sched_start(&ring->sched);
5937 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5938 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5940 if (tmp_adev->asic_reset_res)
5941 r = tmp_adev->asic_reset_res;
5943 tmp_adev->asic_reset_res = 0;
5946 /* bad news, how to tell it to userspace ?
5947 * for ras error, we should report GPU bad status instead of
5950 if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5951 !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5952 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
5953 atomic_read(&tmp_adev->gpu_reset_counter));
5954 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5956 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5957 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5958 DRM_WARN("smart shift update failed\n");
5963 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5964 /* unlock kfd: SRIOV would do it separately */
5965 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5966 amdgpu_amdkfd_post_reset(tmp_adev);
5968 /* kfd_post_reset will do nothing if kfd device is not initialized,
5969 * need to bring up kfd here if it's not be initialized before
5971 if (!adev->kfd.init_complete)
5972 amdgpu_amdkfd_device_init(adev);
5974 if (audio_suspended)
5975 amdgpu_device_resume_display_audio(tmp_adev);
5977 amdgpu_device_unset_mp1_state(tmp_adev);
5979 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5982 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5984 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5988 mutex_unlock(&hive->hive_lock);
5989 amdgpu_put_xgmi_hive(hive);
5993 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5995 atomic_set(&adev->reset_domain->reset_res, r);
6000 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6002 * @adev: amdgpu_device pointer
6003 * @speed: pointer to the speed of the link
6004 * @width: pointer to the width of the link
6006 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6007 * first physical partner to an AMD dGPU.
6008 * This will exclude any virtual switches and links.
6010 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6011 enum pci_bus_speed *speed,
6012 enum pcie_link_width *width)
6014 struct pci_dev *parent = adev->pdev;
6016 if (!speed || !width)
6019 *speed = PCI_SPEED_UNKNOWN;
6020 *width = PCIE_LNK_WIDTH_UNKNOWN;
6022 if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6023 while ((parent = pci_upstream_bridge(parent))) {
6024 /* skip upstream/downstream switches internal to dGPU*/
6025 if (parent->vendor == PCI_VENDOR_ID_ATI)
6027 *speed = pcie_get_speed_cap(parent);
6028 *width = pcie_get_width_cap(parent);
6032 /* use the current speeds rather than max if switching is not supported */
6033 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6038 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6040 * @adev: amdgpu_device pointer
6042 * Fetchs and stores in the driver the PCIE capabilities (gen speed
6043 * and lanes) of the slot the device is in. Handles APUs and
6044 * virtualized environments where PCIE config space may not be available.
6046 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6048 struct pci_dev *pdev;
6049 enum pci_bus_speed speed_cap, platform_speed_cap;
6050 enum pcie_link_width platform_link_width;
6052 if (amdgpu_pcie_gen_cap)
6053 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6055 if (amdgpu_pcie_lane_cap)
6056 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6058 /* covers APUs as well */
6059 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6060 if (adev->pm.pcie_gen_mask == 0)
6061 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6062 if (adev->pm.pcie_mlw_mask == 0)
6063 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6067 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6070 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6071 &platform_link_width);
6073 if (adev->pm.pcie_gen_mask == 0) {
6076 speed_cap = pcie_get_speed_cap(pdev);
6077 if (speed_cap == PCI_SPEED_UNKNOWN) {
6078 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6079 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6080 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6082 if (speed_cap == PCIE_SPEED_32_0GT)
6083 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6084 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6085 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6086 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6087 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6088 else if (speed_cap == PCIE_SPEED_16_0GT)
6089 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6090 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6091 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6092 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6093 else if (speed_cap == PCIE_SPEED_8_0GT)
6094 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6095 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6096 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6097 else if (speed_cap == PCIE_SPEED_5_0GT)
6098 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6099 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6101 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6104 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6105 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6106 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6108 if (platform_speed_cap == PCIE_SPEED_32_0GT)
6109 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6110 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6111 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6112 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6113 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6114 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6115 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6116 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6117 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6118 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6119 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6120 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6121 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6122 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6123 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6124 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6125 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6127 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6131 if (adev->pm.pcie_mlw_mask == 0) {
6132 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6133 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6135 switch (platform_link_width) {
6137 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6138 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6139 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6140 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6141 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6142 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6143 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6146 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6147 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6148 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6149 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6150 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6151 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6154 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6155 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6156 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6157 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6158 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6161 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6162 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6163 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6164 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6167 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6168 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6169 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6172 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6173 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6176 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6186 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6188 * @adev: amdgpu_device pointer
6189 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6191 * Return true if @peer_adev can access (DMA) @adev through the PCIe
6192 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6195 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6196 struct amdgpu_device *peer_adev)
6198 #ifdef CONFIG_HSA_AMD_P2P
6200 !adev->gmc.xgmi.connected_to_cpu &&
6201 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6203 bool is_large_bar = adev->gmc.visible_vram_size &&
6204 adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6205 bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6207 if (!p2p_addressable) {
6208 uint64_t address_mask = peer_adev->dev->dma_mask ?
6209 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6210 resource_size_t aper_limit =
6211 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6213 p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6214 aper_limit & address_mask);
6216 return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6222 int amdgpu_device_baco_enter(struct drm_device *dev)
6224 struct amdgpu_device *adev = drm_to_adev(dev);
6225 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6227 if (!amdgpu_device_supports_baco(dev))
6230 if (ras && adev->ras_enabled &&
6231 adev->nbio.funcs->enable_doorbell_interrupt)
6232 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6234 return amdgpu_dpm_baco_enter(adev);
6237 int amdgpu_device_baco_exit(struct drm_device *dev)
6239 struct amdgpu_device *adev = drm_to_adev(dev);
6240 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6243 if (!amdgpu_device_supports_baco(dev))
6246 ret = amdgpu_dpm_baco_exit(adev);
6250 if (ras && adev->ras_enabled &&
6251 adev->nbio.funcs->enable_doorbell_interrupt)
6252 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6254 if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6255 adev->nbio.funcs->clear_doorbell_interrupt)
6256 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6262 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6263 * @pdev: PCI device struct
6264 * @state: PCI channel state
6266 * Description: Called when a PCI error is detected.
6268 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6270 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6272 struct drm_device *dev = pci_get_drvdata(pdev);
6273 struct amdgpu_device *adev = drm_to_adev(dev);
6276 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6278 if (adev->gmc.xgmi.num_physical_nodes > 1) {
6279 DRM_WARN("No support for XGMI hive yet...");
6280 return PCI_ERS_RESULT_DISCONNECT;
6283 adev->pci_channel_state = state;
6286 case pci_channel_io_normal:
6287 return PCI_ERS_RESULT_CAN_RECOVER;
6288 /* Fatal error, prepare for slot reset */
6289 case pci_channel_io_frozen:
6291 * Locking adev->reset_domain->sem will prevent any external access
6292 * to GPU during PCI error recovery
6294 amdgpu_device_lock_reset_domain(adev->reset_domain);
6295 amdgpu_device_set_mp1_state(adev);
6298 * Block any work scheduling as we do for regular GPU reset
6299 * for the duration of the recovery
6301 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6302 struct amdgpu_ring *ring = adev->rings[i];
6304 if (!amdgpu_ring_sched_ready(ring))
6307 drm_sched_stop(&ring->sched, NULL);
6309 atomic_inc(&adev->gpu_reset_counter);
6310 return PCI_ERS_RESULT_NEED_RESET;
6311 case pci_channel_io_perm_failure:
6312 /* Permanent error, prepare for device removal */
6313 return PCI_ERS_RESULT_DISCONNECT;
6316 return PCI_ERS_RESULT_NEED_RESET;
6320 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6321 * @pdev: pointer to PCI device
6323 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6326 DRM_INFO("PCI error: mmio enabled callback!!\n");
6328 /* TODO - dump whatever for debugging purposes */
6330 /* This called only if amdgpu_pci_error_detected returns
6331 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6332 * works, no need to reset slot.
6335 return PCI_ERS_RESULT_RECOVERED;
6339 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6340 * @pdev: PCI device struct
6342 * Description: This routine is called by the pci error recovery
6343 * code after the PCI slot has been reset, just before we
6344 * should resume normal operations.
6346 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6348 struct drm_device *dev = pci_get_drvdata(pdev);
6349 struct amdgpu_device *adev = drm_to_adev(dev);
6351 struct amdgpu_reset_context reset_context;
6353 struct list_head device_list;
6355 /* PCI error slot reset should be skipped During RAS recovery */
6356 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6357 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6358 amdgpu_ras_in_recovery(adev))
6359 return PCI_ERS_RESULT_RECOVERED;
6361 DRM_INFO("PCI error: slot reset callback!!\n");
6363 memset(&reset_context, 0, sizeof(reset_context));
6365 INIT_LIST_HEAD(&device_list);
6366 list_add_tail(&adev->reset_list, &device_list);
6368 /* wait for asic to come out of reset */
6371 /* Restore PCI confspace */
6372 amdgpu_device_load_pci_state(pdev);
6374 /* confirm ASIC came out of reset */
6375 for (i = 0; i < adev->usec_timeout; i++) {
6376 memsize = amdgpu_asic_get_config_memsize(adev);
6378 if (memsize != 0xffffffff)
6382 if (memsize == 0xffffffff) {
6387 reset_context.method = AMD_RESET_METHOD_NONE;
6388 reset_context.reset_req_dev = adev;
6389 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6390 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6392 adev->no_hw_access = true;
6393 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6394 adev->no_hw_access = false;
6398 r = amdgpu_do_asic_reset(&device_list, &reset_context);
6402 if (amdgpu_device_cache_pci_state(adev->pdev))
6403 pci_restore_state(adev->pdev);
6405 DRM_INFO("PCIe error recovery succeeded\n");
6407 DRM_ERROR("PCIe error recovery failed, err:%d", r);
6408 amdgpu_device_unset_mp1_state(adev);
6409 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6412 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6416 * amdgpu_pci_resume() - resume normal ops after PCI reset
6417 * @pdev: pointer to PCI device
6419 * Called when the error recovery driver tells us that its
6420 * OK to resume normal operation.
6422 void amdgpu_pci_resume(struct pci_dev *pdev)
6424 struct drm_device *dev = pci_get_drvdata(pdev);
6425 struct amdgpu_device *adev = drm_to_adev(dev);
6429 DRM_INFO("PCI error: resume callback!!\n");
6431 /* Only continue execution for the case of pci_channel_io_frozen */
6432 if (adev->pci_channel_state != pci_channel_io_frozen)
6435 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6436 struct amdgpu_ring *ring = adev->rings[i];
6438 if (!amdgpu_ring_sched_ready(ring))
6441 drm_sched_start(&ring->sched);
6444 amdgpu_device_unset_mp1_state(adev);
6445 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6448 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6450 struct drm_device *dev = pci_get_drvdata(pdev);
6451 struct amdgpu_device *adev = drm_to_adev(dev);
6454 if (amdgpu_sriov_vf(adev))
6457 r = pci_save_state(pdev);
6459 kfree(adev->pci_state);
6461 adev->pci_state = pci_store_saved_state(pdev);
6463 if (!adev->pci_state) {
6464 DRM_ERROR("Failed to store PCI saved state");
6468 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6475 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6477 struct drm_device *dev = pci_get_drvdata(pdev);
6478 struct amdgpu_device *adev = drm_to_adev(dev);
6481 if (!adev->pci_state)
6484 r = pci_load_saved_state(pdev, adev->pci_state);
6487 pci_restore_state(pdev);
6489 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6496 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6497 struct amdgpu_ring *ring)
6499 #ifdef CONFIG_X86_64
6500 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6503 if (adev->gmc.xgmi.connected_to_cpu)
6506 if (ring && ring->funcs->emit_hdp_flush)
6507 amdgpu_ring_emit_hdp_flush(ring);
6509 amdgpu_asic_flush_hdp(adev, ring);
6512 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6513 struct amdgpu_ring *ring)
6515 #ifdef CONFIG_X86_64
6516 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6519 if (adev->gmc.xgmi.connected_to_cpu)
6522 amdgpu_asic_invalidate_hdp(adev, ring);
6525 int amdgpu_in_reset(struct amdgpu_device *adev)
6527 return atomic_read(&adev->reset_domain->in_gpu_reset);
6531 * amdgpu_device_halt() - bring hardware to some kind of halt state
6533 * @adev: amdgpu_device pointer
6535 * Bring hardware to some kind of halt state so that no one can touch it
6536 * any more. It will help to maintain error context when error occurred.
6537 * Compare to a simple hang, the system will keep stable at least for SSH
6538 * access. Then it should be trivial to inspect the hardware state and
6539 * see what's going on. Implemented as following:
6541 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6542 * clears all CPU mappings to device, disallows remappings through page faults
6543 * 2. amdgpu_irq_disable_all() disables all interrupts
6544 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6545 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6546 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6547 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6548 * flush any in flight DMA operations
6550 void amdgpu_device_halt(struct amdgpu_device *adev)
6552 struct pci_dev *pdev = adev->pdev;
6553 struct drm_device *ddev = adev_to_drm(adev);
6555 amdgpu_xcp_dev_unplug(adev);
6556 drm_dev_unplug(ddev);
6558 amdgpu_irq_disable_all(adev);
6560 amdgpu_fence_driver_hw_fini(adev);
6562 adev->no_hw_access = true;
6564 amdgpu_device_unmap_mmio(adev);
6566 pci_disable_device(pdev);
6567 pci_wait_for_pending_transaction(pdev);
6570 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6573 unsigned long flags, address, data;
6576 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6577 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6579 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6580 WREG32(address, reg * 4);
6581 (void)RREG32(address);
6583 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6587 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6590 unsigned long flags, address, data;
6592 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6593 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6595 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6596 WREG32(address, reg * 4);
6597 (void)RREG32(address);
6600 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6604 * amdgpu_device_get_gang - return a reference to the current gang
6605 * @adev: amdgpu_device pointer
6607 * Returns: A new reference to the current gang leader.
6609 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6611 struct dma_fence *fence;
6614 fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6620 * amdgpu_device_switch_gang - switch to a new gang
6621 * @adev: amdgpu_device pointer
6622 * @gang: the gang to switch to
6624 * Try to switch to a new gang.
6625 * Returns: NULL if we switched to the new gang or a reference to the current
6628 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6629 struct dma_fence *gang)
6631 struct dma_fence *old = NULL;
6635 old = amdgpu_device_get_gang(adev);
6639 if (!dma_fence_is_signaled(old))
6642 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6649 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6651 switch (adev->asic_type) {
6652 #ifdef CONFIG_DRM_AMDGPU_SI
6656 /* chips with no display hardware */
6658 #ifdef CONFIG_DRM_AMDGPU_SI
6664 #ifdef CONFIG_DRM_AMDGPU_CIK
6673 case CHIP_POLARIS10:
6674 case CHIP_POLARIS11:
6675 case CHIP_POLARIS12:
6679 /* chips with display hardware */
6683 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6684 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6690 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6691 uint32_t inst, uint32_t reg_addr, char reg_name[],
6692 uint32_t expected_value, uint32_t mask)
6696 uint32_t tmp_ = RREG32(reg_addr);
6697 uint32_t loop = adev->usec_timeout;
6699 while ((tmp_ & (mask)) != (expected_value)) {
6701 loop = adev->usec_timeout;
6705 tmp_ = RREG32(reg_addr);
6708 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6709 inst, reg_name, (uint32_t)expected_value,
6710 (uint32_t)(tmp_ & (mask)));