2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
58 #ifdef CONFIG_DRM_AMDGPU_CIK
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
83 #include <drm/drm_drv.h>
85 #if IS_ENABLED(CONFIG_X86)
86 #include <asm/intel-family.h>
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
97 #define AMDGPU_RESUME_MS 2000
98 #define AMDGPU_MAX_RETRY_LIMIT 2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
104 static const struct drm_driver amdgpu_kms_driver;
106 const char *amdgpu_asic_name[] = {
147 #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
149 * Default init level where all blocks are expected to be initialized. This is
150 * the level of initialization expected by default and also after a full reset
153 struct amdgpu_init_level amdgpu_init_default = {
154 .level = AMDGPU_INIT_LEVEL_DEFAULT,
155 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
159 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
160 * is used for cases like reset on initialization where the entire hive needs to
161 * be reset before first use.
163 struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
164 .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
165 .hwini_ip_block_mask =
166 BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
167 BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
168 BIT(AMD_IP_BLOCK_TYPE_PSP)
171 static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
172 enum amd_ip_block_type block)
174 return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
177 void amdgpu_set_init_level(struct amdgpu_device *adev,
178 enum amdgpu_init_lvl_id lvl)
181 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
182 adev->init_lvl = &amdgpu_init_minimal_xgmi;
184 case AMDGPU_INIT_LEVEL_DEFAULT:
187 adev->init_lvl = &amdgpu_init_default;
192 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
195 * DOC: pcie_replay_count
197 * The amdgpu driver provides a sysfs API for reporting the total number
198 * of PCIe replays (NAKs)
199 * The file pcie_replay_count is used for this and returns the total
200 * number of replays as a sum of the NAKs generated and NAKs received
203 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
204 struct device_attribute *attr, char *buf)
206 struct drm_device *ddev = dev_get_drvdata(dev);
207 struct amdgpu_device *adev = drm_to_adev(ddev);
208 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
210 return sysfs_emit(buf, "%llu\n", cnt);
213 static DEVICE_ATTR(pcie_replay_count, 0444,
214 amdgpu_device_get_pcie_replay_count, NULL);
216 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
217 struct bin_attribute *attr, char *buf,
218 loff_t ppos, size_t count)
220 struct device *dev = kobj_to_dev(kobj);
221 struct drm_device *ddev = dev_get_drvdata(dev);
222 struct amdgpu_device *adev = drm_to_adev(ddev);
226 case AMDGPU_SYS_REG_STATE_XGMI:
227 bytes_read = amdgpu_asic_get_reg_state(
228 adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
230 case AMDGPU_SYS_REG_STATE_WAFL:
231 bytes_read = amdgpu_asic_get_reg_state(
232 adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
234 case AMDGPU_SYS_REG_STATE_PCIE:
235 bytes_read = amdgpu_asic_get_reg_state(
236 adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
238 case AMDGPU_SYS_REG_STATE_USR:
239 bytes_read = amdgpu_asic_get_reg_state(
240 adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
242 case AMDGPU_SYS_REG_STATE_USR_1:
243 bytes_read = amdgpu_asic_get_reg_state(
244 adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
253 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
254 AMDGPU_SYS_REG_STATE_END);
256 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
260 if (!amdgpu_asic_get_reg_state_supported(adev))
263 ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
268 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
270 if (!amdgpu_asic_get_reg_state_supported(adev))
272 sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
278 * The amdgpu driver provides a sysfs API for giving board related information.
279 * It provides the form factor information in the format
283 * Possible form factor values
285 * - "cem" - PCIE CEM card
286 * - "oam" - Open Compute Accelerator Module
287 * - "unknown" - Not known
291 static ssize_t amdgpu_device_get_board_info(struct device *dev,
292 struct device_attribute *attr,
295 struct drm_device *ddev = dev_get_drvdata(dev);
296 struct amdgpu_device *adev = drm_to_adev(ddev);
297 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
300 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
301 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
304 case AMDGPU_PKG_TYPE_CEM:
307 case AMDGPU_PKG_TYPE_OAM:
315 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
318 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
320 static struct attribute *amdgpu_board_attrs[] = {
321 &dev_attr_board_info.attr,
325 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
326 struct attribute *attr, int n)
328 struct device *dev = kobj_to_dev(kobj);
329 struct drm_device *ddev = dev_get_drvdata(dev);
330 struct amdgpu_device *adev = drm_to_adev(ddev);
332 if (adev->flags & AMD_IS_APU)
338 static const struct attribute_group amdgpu_board_attrs_group = {
339 .attrs = amdgpu_board_attrs,
340 .is_visible = amdgpu_board_attrs_is_visible
343 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
347 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
349 * @dev: drm_device pointer
351 * Returns true if the device is a dGPU with ATPX power control,
352 * otherwise return false.
354 bool amdgpu_device_supports_px(struct drm_device *dev)
356 struct amdgpu_device *adev = drm_to_adev(dev);
358 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
364 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
366 * @dev: drm_device pointer
368 * Returns true if the device is a dGPU with ACPI power control,
369 * otherwise return false.
371 bool amdgpu_device_supports_boco(struct drm_device *dev)
373 struct amdgpu_device *adev = drm_to_adev(dev);
376 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
382 * amdgpu_device_supports_baco - Does the device support BACO
384 * @dev: drm_device pointer
387 * 1 if the device supporte BACO;
388 * 3 if the device support MACO (only works if BACO is supported)
389 * otherwise return 0.
391 int amdgpu_device_supports_baco(struct drm_device *dev)
393 struct amdgpu_device *adev = drm_to_adev(dev);
395 return amdgpu_asic_supports_baco(adev);
398 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
400 struct drm_device *dev;
403 dev = adev_to_drm(adev);
405 adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
406 bamaco_support = amdgpu_device_supports_baco(dev);
408 switch (amdgpu_runtime_pm) {
410 if (bamaco_support & MACO_SUPPORT) {
411 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
412 dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
413 } else if (bamaco_support == BACO_SUPPORT) {
414 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
415 dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
419 if (bamaco_support & BACO_SUPPORT) {
420 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
421 dev_info(adev->dev, "Forcing BACO for runtime pm\n");
426 if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
427 adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
428 dev_info(adev->dev, "Using ATPX for runtime pm\n");
429 } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
430 adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
431 dev_info(adev->dev, "Using BOCO for runtime pm\n");
436 switch (adev->asic_type) {
439 /* BACO are not supported on vega20 and arctrus */
442 /* enable BACO as runpm mode if noretry=0 */
443 if (!adev->gmc.noretry)
444 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
447 /* enable BACO as runpm mode on CI+ */
448 adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
452 if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
453 if (bamaco_support & MACO_SUPPORT) {
454 adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
455 dev_info(adev->dev, "Using BAMACO for runtime pm\n");
457 dev_info(adev->dev, "Using BACO for runtime pm\n");
463 dev_info(adev->dev, "runtime pm is manually disabled\n");
470 if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
471 dev_info(adev->dev, "Runtime PM not available\n");
474 * amdgpu_device_supports_smart_shift - Is the device dGPU with
475 * smart shift support
477 * @dev: drm_device pointer
479 * Returns true if the device is a dGPU with Smart Shift support,
480 * otherwise returns false.
482 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
484 return (amdgpu_device_supports_boco(dev) &&
485 amdgpu_acpi_is_power_shift_control_supported());
489 * VRAM access helper functions
493 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
495 * @adev: amdgpu_device pointer
496 * @pos: offset of the buffer in vram
497 * @buf: virtual address of the buffer in system memory
498 * @size: read/write size, sizeof(@buf) must > @size
499 * @write: true - write to vram, otherwise - read from vram
501 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
502 void *buf, size_t size, bool write)
505 uint32_t hi = ~0, tmp = 0;
506 uint32_t *data = buf;
510 if (!drm_dev_enter(adev_to_drm(adev), &idx))
513 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
515 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
516 for (last = pos + size; pos < last; pos += 4) {
519 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
521 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
525 WREG32_NO_KIQ(mmMM_DATA, *data++);
527 *data++ = RREG32_NO_KIQ(mmMM_DATA);
530 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
535 * amdgpu_device_aper_access - access vram by vram aperature
537 * @adev: amdgpu_device pointer
538 * @pos: offset of the buffer in vram
539 * @buf: virtual address of the buffer in system memory
540 * @size: read/write size, sizeof(@buf) must > @size
541 * @write: true - write to vram, otherwise - read from vram
543 * The return value means how many bytes have been transferred.
545 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
546 void *buf, size_t size, bool write)
553 if (!adev->mman.aper_base_kaddr)
556 last = min(pos + size, adev->gmc.visible_vram_size);
558 addr = adev->mman.aper_base_kaddr + pos;
562 memcpy_toio(addr, buf, count);
563 /* Make sure HDP write cache flush happens without any reordering
564 * after the system memory contents are sent over PCIe device
567 amdgpu_device_flush_hdp(adev, NULL);
569 amdgpu_device_invalidate_hdp(adev, NULL);
570 /* Make sure HDP read cache is invalidated before issuing a read
574 memcpy_fromio(buf, addr, count);
586 * amdgpu_device_vram_access - read/write a buffer in vram
588 * @adev: amdgpu_device pointer
589 * @pos: offset of the buffer in vram
590 * @buf: virtual address of the buffer in system memory
591 * @size: read/write size, sizeof(@buf) must > @size
592 * @write: true - write to vram, otherwise - read from vram
594 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
595 void *buf, size_t size, bool write)
599 /* try to using vram apreature to access vram first */
600 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
603 /* using MM to access rest vram */
606 amdgpu_device_mm_access(adev, pos, buf, size, write);
611 * register access helper functions.
614 /* Check if hw access should be skipped because of hotplug or device error */
615 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
617 if (adev->no_hw_access)
620 #ifdef CONFIG_LOCKDEP
622 * This is a bit complicated to understand, so worth a comment. What we assert
623 * here is that the GPU reset is not running on another thread in parallel.
625 * For this we trylock the read side of the reset semaphore, if that succeeds
626 * we know that the reset is not running in paralell.
628 * If the trylock fails we assert that we are either already holding the read
629 * side of the lock or are the reset thread itself and hold the write side of
633 if (down_read_trylock(&adev->reset_domain->sem))
634 up_read(&adev->reset_domain->sem);
636 lockdep_assert_held(&adev->reset_domain->sem);
643 * amdgpu_device_rreg - read a memory mapped IO or indirect register
645 * @adev: amdgpu_device pointer
646 * @reg: dword aligned register offset
647 * @acc_flags: access flags which require special behavior
649 * Returns the 32 bit value from the offset specified.
651 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
652 uint32_t reg, uint32_t acc_flags)
656 if (amdgpu_device_skip_hw_access(adev))
659 if ((reg * 4) < adev->rmmio_size) {
660 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
661 amdgpu_sriov_runtime(adev) &&
662 down_read_trylock(&adev->reset_domain->sem)) {
663 ret = amdgpu_kiq_rreg(adev, reg, 0);
664 up_read(&adev->reset_domain->sem);
666 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
669 ret = adev->pcie_rreg(adev, reg * 4);
672 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
678 * MMIO register read with bytes helper functions
679 * @offset:bytes offset from MMIO start
683 * amdgpu_mm_rreg8 - read a memory mapped IO register
685 * @adev: amdgpu_device pointer
686 * @offset: byte aligned register offset
688 * Returns the 8 bit value from the offset specified.
690 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
692 if (amdgpu_device_skip_hw_access(adev))
695 if (offset < adev->rmmio_size)
696 return (readb(adev->rmmio + offset));
702 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
704 * @adev: amdgpu_device pointer
705 * @reg: dword aligned register offset
706 * @acc_flags: access flags which require special behavior
707 * @xcc_id: xcc accelerated compute core id
709 * Returns the 32 bit value from the offset specified.
711 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
712 uint32_t reg, uint32_t acc_flags,
715 uint32_t ret, rlcg_flag;
717 if (amdgpu_device_skip_hw_access(adev))
720 if ((reg * 4) < adev->rmmio_size) {
721 if (amdgpu_sriov_vf(adev) &&
722 !amdgpu_sriov_runtime(adev) &&
723 adev->gfx.rlc.rlcg_reg_access_supported &&
724 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
727 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
728 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
729 amdgpu_sriov_runtime(adev) &&
730 down_read_trylock(&adev->reset_domain->sem)) {
731 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
732 up_read(&adev->reset_domain->sem);
734 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
737 ret = adev->pcie_rreg(adev, reg * 4);
744 * MMIO register write with bytes helper functions
745 * @offset:bytes offset from MMIO start
746 * @value: the value want to be written to the register
750 * amdgpu_mm_wreg8 - read a memory mapped IO register
752 * @adev: amdgpu_device pointer
753 * @offset: byte aligned register offset
754 * @value: 8 bit value to write
756 * Writes the value specified to the offset specified.
758 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
760 if (amdgpu_device_skip_hw_access(adev))
763 if (offset < adev->rmmio_size)
764 writeb(value, adev->rmmio + offset);
770 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
772 * @adev: amdgpu_device pointer
773 * @reg: dword aligned register offset
774 * @v: 32 bit value to write to the register
775 * @acc_flags: access flags which require special behavior
777 * Writes the value specified to the offset specified.
779 void amdgpu_device_wreg(struct amdgpu_device *adev,
780 uint32_t reg, uint32_t v,
783 if (amdgpu_device_skip_hw_access(adev))
786 if ((reg * 4) < adev->rmmio_size) {
787 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
788 amdgpu_sriov_runtime(adev) &&
789 down_read_trylock(&adev->reset_domain->sem)) {
790 amdgpu_kiq_wreg(adev, reg, v, 0);
791 up_read(&adev->reset_domain->sem);
793 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
796 adev->pcie_wreg(adev, reg * 4, v);
799 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
803 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
805 * @adev: amdgpu_device pointer
806 * @reg: mmio/rlc register
808 * @xcc_id: xcc accelerated compute core id
810 * this function is invoked only for the debugfs register access
812 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
813 uint32_t reg, uint32_t v,
816 if (amdgpu_device_skip_hw_access(adev))
819 if (amdgpu_sriov_fullaccess(adev) &&
820 adev->gfx.rlc.funcs &&
821 adev->gfx.rlc.funcs->is_rlcg_access_range) {
822 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
823 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
824 } else if ((reg * 4) >= adev->rmmio_size) {
825 adev->pcie_wreg(adev, reg * 4, v);
827 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
832 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
834 * @adev: amdgpu_device pointer
835 * @reg: dword aligned register offset
836 * @v: 32 bit value to write to the register
837 * @acc_flags: access flags which require special behavior
838 * @xcc_id: xcc accelerated compute core id
840 * Writes the value specified to the offset specified.
842 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
843 uint32_t reg, uint32_t v,
844 uint32_t acc_flags, uint32_t xcc_id)
848 if (amdgpu_device_skip_hw_access(adev))
851 if ((reg * 4) < adev->rmmio_size) {
852 if (amdgpu_sriov_vf(adev) &&
853 !amdgpu_sriov_runtime(adev) &&
854 adev->gfx.rlc.rlcg_reg_access_supported &&
855 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
858 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
859 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
860 amdgpu_sriov_runtime(adev) &&
861 down_read_trylock(&adev->reset_domain->sem)) {
862 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
863 up_read(&adev->reset_domain->sem);
865 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
868 adev->pcie_wreg(adev, reg * 4, v);
873 * amdgpu_device_indirect_rreg - read an indirect register
875 * @adev: amdgpu_device pointer
876 * @reg_addr: indirect register address to read from
878 * Returns the value of indirect register @reg_addr
880 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
883 unsigned long flags, pcie_index, pcie_data;
884 void __iomem *pcie_index_offset;
885 void __iomem *pcie_data_offset;
888 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
889 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
891 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
892 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
893 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
895 writel(reg_addr, pcie_index_offset);
896 readl(pcie_index_offset);
897 r = readl(pcie_data_offset);
898 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
903 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
906 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
908 void __iomem *pcie_index_offset;
909 void __iomem *pcie_index_hi_offset;
910 void __iomem *pcie_data_offset;
912 if (unlikely(!adev->nbio.funcs)) {
913 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
914 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
916 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
917 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
920 if (reg_addr >> 32) {
921 if (unlikely(!adev->nbio.funcs))
922 pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
924 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
929 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
930 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
931 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
932 if (pcie_index_hi != 0)
933 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
936 writel(reg_addr, pcie_index_offset);
937 readl(pcie_index_offset);
938 if (pcie_index_hi != 0) {
939 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
940 readl(pcie_index_hi_offset);
942 r = readl(pcie_data_offset);
944 /* clear the high bits */
945 if (pcie_index_hi != 0) {
946 writel(0, pcie_index_hi_offset);
947 readl(pcie_index_hi_offset);
950 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
956 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
958 * @adev: amdgpu_device pointer
959 * @reg_addr: indirect register address to read from
961 * Returns the value of indirect register @reg_addr
963 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
966 unsigned long flags, pcie_index, pcie_data;
967 void __iomem *pcie_index_offset;
968 void __iomem *pcie_data_offset;
971 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
972 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
974 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
975 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
976 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
978 /* read low 32 bits */
979 writel(reg_addr, pcie_index_offset);
980 readl(pcie_index_offset);
981 r = readl(pcie_data_offset);
982 /* read high 32 bits */
983 writel(reg_addr + 4, pcie_index_offset);
984 readl(pcie_index_offset);
985 r |= ((u64)readl(pcie_data_offset) << 32);
986 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
991 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
994 unsigned long flags, pcie_index, pcie_data;
995 unsigned long pcie_index_hi = 0;
996 void __iomem *pcie_index_offset;
997 void __iomem *pcie_index_hi_offset;
998 void __iomem *pcie_data_offset;
1001 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1002 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1003 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1004 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1006 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1007 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1008 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1009 if (pcie_index_hi != 0)
1010 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1013 /* read low 32 bits */
1014 writel(reg_addr, pcie_index_offset);
1015 readl(pcie_index_offset);
1016 if (pcie_index_hi != 0) {
1017 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1018 readl(pcie_index_hi_offset);
1020 r = readl(pcie_data_offset);
1021 /* read high 32 bits */
1022 writel(reg_addr + 4, pcie_index_offset);
1023 readl(pcie_index_offset);
1024 if (pcie_index_hi != 0) {
1025 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1026 readl(pcie_index_hi_offset);
1028 r |= ((u64)readl(pcie_data_offset) << 32);
1030 /* clear the high bits */
1031 if (pcie_index_hi != 0) {
1032 writel(0, pcie_index_hi_offset);
1033 readl(pcie_index_hi_offset);
1036 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1042 * amdgpu_device_indirect_wreg - write an indirect register address
1044 * @adev: amdgpu_device pointer
1045 * @reg_addr: indirect register offset
1046 * @reg_data: indirect register data
1049 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1050 u32 reg_addr, u32 reg_data)
1052 unsigned long flags, pcie_index, pcie_data;
1053 void __iomem *pcie_index_offset;
1054 void __iomem *pcie_data_offset;
1056 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1057 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1059 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1060 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1061 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1063 writel(reg_addr, pcie_index_offset);
1064 readl(pcie_index_offset);
1065 writel(reg_data, pcie_data_offset);
1066 readl(pcie_data_offset);
1067 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1070 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1071 u64 reg_addr, u32 reg_data)
1073 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1074 void __iomem *pcie_index_offset;
1075 void __iomem *pcie_index_hi_offset;
1076 void __iomem *pcie_data_offset;
1078 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1079 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1080 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1081 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1085 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1086 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1087 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1088 if (pcie_index_hi != 0)
1089 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1092 writel(reg_addr, pcie_index_offset);
1093 readl(pcie_index_offset);
1094 if (pcie_index_hi != 0) {
1095 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1096 readl(pcie_index_hi_offset);
1098 writel(reg_data, pcie_data_offset);
1099 readl(pcie_data_offset);
1101 /* clear the high bits */
1102 if (pcie_index_hi != 0) {
1103 writel(0, pcie_index_hi_offset);
1104 readl(pcie_index_hi_offset);
1107 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1111 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1113 * @adev: amdgpu_device pointer
1114 * @reg_addr: indirect register offset
1115 * @reg_data: indirect register data
1118 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1119 u32 reg_addr, u64 reg_data)
1121 unsigned long flags, pcie_index, pcie_data;
1122 void __iomem *pcie_index_offset;
1123 void __iomem *pcie_data_offset;
1125 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1126 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1128 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1129 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1130 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1132 /* write low 32 bits */
1133 writel(reg_addr, pcie_index_offset);
1134 readl(pcie_index_offset);
1135 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1136 readl(pcie_data_offset);
1137 /* write high 32 bits */
1138 writel(reg_addr + 4, pcie_index_offset);
1139 readl(pcie_index_offset);
1140 writel((u32)(reg_data >> 32), pcie_data_offset);
1141 readl(pcie_data_offset);
1142 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1145 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1146 u64 reg_addr, u64 reg_data)
1148 unsigned long flags, pcie_index, pcie_data;
1149 unsigned long pcie_index_hi = 0;
1150 void __iomem *pcie_index_offset;
1151 void __iomem *pcie_index_hi_offset;
1152 void __iomem *pcie_data_offset;
1154 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1155 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1156 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1157 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1159 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1160 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1161 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1162 if (pcie_index_hi != 0)
1163 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1166 /* write low 32 bits */
1167 writel(reg_addr, pcie_index_offset);
1168 readl(pcie_index_offset);
1169 if (pcie_index_hi != 0) {
1170 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1171 readl(pcie_index_hi_offset);
1173 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1174 readl(pcie_data_offset);
1175 /* write high 32 bits */
1176 writel(reg_addr + 4, pcie_index_offset);
1177 readl(pcie_index_offset);
1178 if (pcie_index_hi != 0) {
1179 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1180 readl(pcie_index_hi_offset);
1182 writel((u32)(reg_data >> 32), pcie_data_offset);
1183 readl(pcie_data_offset);
1185 /* clear the high bits */
1186 if (pcie_index_hi != 0) {
1187 writel(0, pcie_index_hi_offset);
1188 readl(pcie_index_hi_offset);
1191 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1195 * amdgpu_device_get_rev_id - query device rev_id
1197 * @adev: amdgpu_device pointer
1199 * Return device rev_id
1201 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1203 return adev->nbio.funcs->get_rev_id(adev);
1207 * amdgpu_invalid_rreg - dummy reg read function
1209 * @adev: amdgpu_device pointer
1210 * @reg: offset of register
1212 * Dummy register read function. Used for register blocks
1213 * that certain asics don't have (all asics).
1214 * Returns the value in the register.
1216 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1218 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1223 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1225 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1231 * amdgpu_invalid_wreg - dummy reg write function
1233 * @adev: amdgpu_device pointer
1234 * @reg: offset of register
1235 * @v: value to write to the register
1237 * Dummy register read function. Used for register blocks
1238 * that certain asics don't have (all asics).
1240 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1242 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1247 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1249 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1255 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1257 * @adev: amdgpu_device pointer
1258 * @reg: offset of register
1260 * Dummy register read function. Used for register blocks
1261 * that certain asics don't have (all asics).
1262 * Returns the value in the register.
1264 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1266 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1271 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1273 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1279 * amdgpu_invalid_wreg64 - dummy reg write function
1281 * @adev: amdgpu_device pointer
1282 * @reg: offset of register
1283 * @v: value to write to the register
1285 * Dummy register read function. Used for register blocks
1286 * that certain asics don't have (all asics).
1288 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1290 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1295 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1297 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1303 * amdgpu_block_invalid_rreg - dummy reg read function
1305 * @adev: amdgpu_device pointer
1306 * @block: offset of instance
1307 * @reg: offset of register
1309 * Dummy register read function. Used for register blocks
1310 * that certain asics don't have (all asics).
1311 * Returns the value in the register.
1313 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1314 uint32_t block, uint32_t reg)
1316 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1323 * amdgpu_block_invalid_wreg - dummy reg write function
1325 * @adev: amdgpu_device pointer
1326 * @block: offset of instance
1327 * @reg: offset of register
1328 * @v: value to write to the register
1330 * Dummy register read function. Used for register blocks
1331 * that certain asics don't have (all asics).
1333 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1335 uint32_t reg, uint32_t v)
1337 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1343 * amdgpu_device_asic_init - Wrapper for atom asic_init
1345 * @adev: amdgpu_device pointer
1347 * Does any asic specific work and then calls atom asic init.
1349 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1353 amdgpu_asic_pre_asic_init(adev);
1355 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1356 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1357 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1358 amdgpu_psp_wait_for_bootloader(adev);
1359 ret = amdgpu_atomfirmware_asic_init(adev, true);
1362 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1369 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1371 * @adev: amdgpu_device pointer
1373 * Allocates a scratch page of VRAM for use by various things in the
1376 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1378 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1379 AMDGPU_GEM_DOMAIN_VRAM |
1380 AMDGPU_GEM_DOMAIN_GTT,
1381 &adev->mem_scratch.robj,
1382 &adev->mem_scratch.gpu_addr,
1383 (void **)&adev->mem_scratch.ptr);
1387 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1389 * @adev: amdgpu_device pointer
1391 * Frees the VRAM scratch page.
1393 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1395 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1399 * amdgpu_device_program_register_sequence - program an array of registers.
1401 * @adev: amdgpu_device pointer
1402 * @registers: pointer to the register array
1403 * @array_size: size of the register array
1405 * Programs an array or registers with and or masks.
1406 * This is a helper for setting golden registers.
1408 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1409 const u32 *registers,
1410 const u32 array_size)
1412 u32 tmp, reg, and_mask, or_mask;
1418 for (i = 0; i < array_size; i += 3) {
1419 reg = registers[i + 0];
1420 and_mask = registers[i + 1];
1421 or_mask = registers[i + 2];
1423 if (and_mask == 0xffffffff) {
1428 if (adev->family >= AMDGPU_FAMILY_AI)
1429 tmp |= (or_mask & and_mask);
1438 * amdgpu_device_pci_config_reset - reset the GPU
1440 * @adev: amdgpu_device pointer
1442 * Resets the GPU using the pci config reset sequence.
1443 * Only applicable to asics prior to vega10.
1445 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1447 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1451 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1453 * @adev: amdgpu_device pointer
1455 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1457 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1459 return pci_reset_function(adev->pdev);
1463 * amdgpu_device_wb_*()
1464 * Writeback is the method by which the GPU updates special pages in memory
1465 * with the status of certain GPU events (fences, ring pointers,etc.).
1469 * amdgpu_device_wb_fini - Disable Writeback and free memory
1471 * @adev: amdgpu_device pointer
1473 * Disables Writeback and frees the Writeback memory (all asics).
1474 * Used at driver shutdown.
1476 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1478 if (adev->wb.wb_obj) {
1479 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1481 (void **)&adev->wb.wb);
1482 adev->wb.wb_obj = NULL;
1487 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1489 * @adev: amdgpu_device pointer
1491 * Initializes writeback and allocates writeback memory (all asics).
1492 * Used at driver startup.
1493 * Returns 0 on success or an -error on failure.
1495 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1499 if (adev->wb.wb_obj == NULL) {
1500 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1501 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1502 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1503 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1504 (void **)&adev->wb.wb);
1506 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1510 adev->wb.num_wb = AMDGPU_MAX_WB;
1511 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1513 /* clear wb memory */
1514 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1521 * amdgpu_device_wb_get - Allocate a wb entry
1523 * @adev: amdgpu_device pointer
1526 * Allocate a wb slot for use by the driver (all asics).
1527 * Returns 0 on success or -EINVAL on failure.
1529 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1531 unsigned long flags, offset;
1533 spin_lock_irqsave(&adev->wb.lock, flags);
1534 offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1535 if (offset < adev->wb.num_wb) {
1536 __set_bit(offset, adev->wb.used);
1537 spin_unlock_irqrestore(&adev->wb.lock, flags);
1538 *wb = offset << 3; /* convert to dw offset */
1541 spin_unlock_irqrestore(&adev->wb.lock, flags);
1547 * amdgpu_device_wb_free - Free a wb entry
1549 * @adev: amdgpu_device pointer
1552 * Free a wb slot allocated for use by the driver (all asics)
1554 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1556 unsigned long flags;
1559 spin_lock_irqsave(&adev->wb.lock, flags);
1560 if (wb < adev->wb.num_wb)
1561 __clear_bit(wb, adev->wb.used);
1562 spin_unlock_irqrestore(&adev->wb.lock, flags);
1566 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1568 * @adev: amdgpu_device pointer
1570 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1571 * to fail, but if any of the BARs is not accessible after the size we abort
1572 * driver loading by returning -ENODEV.
1574 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1576 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1577 struct pci_bus *root;
1578 struct resource *res;
1583 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1587 if (amdgpu_sriov_vf(adev))
1590 /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1591 if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1592 DRM_WARN("System can't access extended configuration space, please check!!\n");
1594 /* skip if the bios has already enabled large BAR */
1595 if (adev->gmc.real_vram_size &&
1596 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1599 /* Check if the root BUS has 64bit memory resources */
1600 root = adev->pdev->bus;
1601 while (root->parent)
1602 root = root->parent;
1604 pci_bus_for_each_resource(root, res, i) {
1605 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1606 res->start > 0x100000000ull)
1610 /* Trying to resize is pointless without a root hub window above 4GB */
1614 /* Limit the BAR size to what is available */
1615 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1618 /* Disable memory decoding while we change the BAR addresses and size */
1619 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1620 pci_write_config_word(adev->pdev, PCI_COMMAND,
1621 cmd & ~PCI_COMMAND_MEMORY);
1623 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1624 amdgpu_doorbell_fini(adev);
1625 if (adev->asic_type >= CHIP_BONAIRE)
1626 pci_release_resource(adev->pdev, 2);
1628 pci_release_resource(adev->pdev, 0);
1630 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1632 DRM_INFO("Not enough PCI address space for a large BAR.");
1633 else if (r && r != -ENOTSUPP)
1634 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1636 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1638 /* When the doorbell or fb BAR isn't available we have no chance of
1641 r = amdgpu_doorbell_init(adev);
1642 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1645 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1650 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1652 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1659 * GPU helpers function.
1662 * amdgpu_device_need_post - check if the hw need post or not
1664 * @adev: amdgpu_device pointer
1666 * Check if the asic has been initialized (all asics) at driver startup
1667 * or post is needed if hw reset is performed.
1668 * Returns true if need or false if not.
1670 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1674 if (amdgpu_sriov_vf(adev))
1677 if (!amdgpu_device_read_bios(adev))
1680 if (amdgpu_passthrough(adev)) {
1681 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1682 * some old smc fw still need driver do vPost otherwise gpu hang, while
1683 * those smc fw version above 22.15 doesn't have this flaw, so we force
1684 * vpost executed for smc version below 22.15
1686 if (adev->asic_type == CHIP_FIJI) {
1690 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1691 /* force vPost if error occured */
1695 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1696 release_firmware(adev->pm.fw);
1697 if (fw_ver < 0x00160e00)
1702 /* Don't post if we need to reset whole hive on init */
1703 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1706 if (adev->has_hw_reset) {
1707 adev->has_hw_reset = false;
1711 /* bios scratch used on CIK+ */
1712 if (adev->asic_type >= CHIP_BONAIRE)
1713 return amdgpu_atombios_scratch_need_asic_init(adev);
1715 /* check MEM_SIZE for older asics */
1716 reg = amdgpu_asic_get_config_memsize(adev);
1718 if ((reg != 0) && (reg != 0xffffffff))
1725 * Check whether seamless boot is supported.
1727 * So far we only support seamless boot on DCE 3.0 or later.
1728 * If users report that it works on older ASICS as well, we may
1731 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1733 switch (amdgpu_seamless) {
1741 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1746 if (!(adev->flags & AMD_IS_APU))
1749 if (adev->mman.keep_stolen_vga_memory)
1752 return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1756 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1757 * don't support dynamic speed switching. Until we have confirmation from Intel
1758 * that a specific host supports it, it's safer that we keep it disabled for all.
1760 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1761 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1763 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1765 #if IS_ENABLED(CONFIG_X86)
1766 struct cpuinfo_x86 *c = &cpu_data(0);
1768 /* eGPU change speeds based on USB4 fabric conditions */
1769 if (dev_is_removable(adev->dev))
1772 if (c->x86_vendor == X86_VENDOR_INTEL)
1779 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1781 * @adev: amdgpu_device pointer
1783 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1784 * be set for this device.
1786 * Returns true if it should be used or false if not.
1788 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1790 switch (amdgpu_aspm) {
1800 if (adev->flags & AMD_IS_APU)
1802 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1804 return pcie_aspm_enabled(adev->pdev);
1807 /* if we get transitioned to only one device, take VGA back */
1809 * amdgpu_device_vga_set_decode - enable/disable vga decode
1811 * @pdev: PCI device pointer
1812 * @state: enable/disable vga decode
1814 * Enable/disable vga decode (all asics).
1815 * Returns VGA resource flags.
1817 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1820 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1822 amdgpu_asic_set_vga_state(adev, state);
1824 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1825 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1827 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1831 * amdgpu_device_check_block_size - validate the vm block size
1833 * @adev: amdgpu_device pointer
1835 * Validates the vm block size specified via module parameter.
1836 * The vm block size defines number of bits in page table versus page directory,
1837 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1838 * page table and the remaining bits are in the page directory.
1840 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1842 /* defines number of bits in page table versus page directory,
1843 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1844 * page table and the remaining bits are in the page directory
1846 if (amdgpu_vm_block_size == -1)
1849 if (amdgpu_vm_block_size < 9) {
1850 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1851 amdgpu_vm_block_size);
1852 amdgpu_vm_block_size = -1;
1857 * amdgpu_device_check_vm_size - validate the vm size
1859 * @adev: amdgpu_device pointer
1861 * Validates the vm size in GB specified via module parameter.
1862 * The VM size is the size of the GPU virtual memory space in GB.
1864 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1866 /* no need to check the default value */
1867 if (amdgpu_vm_size == -1)
1870 if (amdgpu_vm_size < 1) {
1871 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1873 amdgpu_vm_size = -1;
1877 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1880 bool is_os_64 = (sizeof(void *) == 8);
1881 uint64_t total_memory;
1882 uint64_t dram_size_seven_GB = 0x1B8000000;
1883 uint64_t dram_size_three_GB = 0xB8000000;
1885 if (amdgpu_smu_memory_pool_size == 0)
1889 DRM_WARN("Not 64-bit OS, feature not supported\n");
1893 total_memory = (uint64_t)si.totalram * si.mem_unit;
1895 if ((amdgpu_smu_memory_pool_size == 1) ||
1896 (amdgpu_smu_memory_pool_size == 2)) {
1897 if (total_memory < dram_size_three_GB)
1899 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1900 (amdgpu_smu_memory_pool_size == 8)) {
1901 if (total_memory < dram_size_seven_GB)
1904 DRM_WARN("Smu memory pool size not supported\n");
1907 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1912 DRM_WARN("No enough system memory\n");
1914 adev->pm.smu_prv_buffer_size = 0;
1917 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1919 if (!(adev->flags & AMD_IS_APU) ||
1920 adev->asic_type < CHIP_RAVEN)
1923 switch (adev->asic_type) {
1925 if (adev->pdev->device == 0x15dd)
1926 adev->apu_flags |= AMD_APU_IS_RAVEN;
1927 if (adev->pdev->device == 0x15d8)
1928 adev->apu_flags |= AMD_APU_IS_PICASSO;
1931 if ((adev->pdev->device == 0x1636) ||
1932 (adev->pdev->device == 0x164c))
1933 adev->apu_flags |= AMD_APU_IS_RENOIR;
1935 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1938 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1940 case CHIP_YELLOW_CARP:
1942 case CHIP_CYAN_SKILLFISH:
1943 if ((adev->pdev->device == 0x13FE) ||
1944 (adev->pdev->device == 0x143F))
1945 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1955 * amdgpu_device_check_arguments - validate module params
1957 * @adev: amdgpu_device pointer
1959 * Validates certain module parameters and updates
1960 * the associated values used by the driver (all asics).
1962 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1966 if (amdgpu_sched_jobs < 4) {
1967 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1969 amdgpu_sched_jobs = 4;
1970 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1971 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1973 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1976 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1977 /* gart size must be greater or equal to 32M */
1978 dev_warn(adev->dev, "gart size (%d) too small\n",
1980 amdgpu_gart_size = -1;
1983 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1984 /* gtt size must be greater or equal to 32M */
1985 dev_warn(adev->dev, "gtt size (%d) too small\n",
1987 amdgpu_gtt_size = -1;
1990 /* valid range is between 4 and 9 inclusive */
1991 if (amdgpu_vm_fragment_size != -1 &&
1992 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1993 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1994 amdgpu_vm_fragment_size = -1;
1997 if (amdgpu_sched_hw_submission < 2) {
1998 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1999 amdgpu_sched_hw_submission);
2000 amdgpu_sched_hw_submission = 2;
2001 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
2002 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2003 amdgpu_sched_hw_submission);
2004 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2007 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
2008 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2009 amdgpu_reset_method = -1;
2012 amdgpu_device_check_smu_prv_buffer_size(adev);
2014 amdgpu_device_check_vm_size(adev);
2016 amdgpu_device_check_block_size(adev);
2018 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2020 for (i = 0; i < MAX_XCP; i++)
2021 adev->enforce_isolation[i] = !!enforce_isolation;
2027 * amdgpu_switcheroo_set_state - set switcheroo state
2029 * @pdev: pci dev pointer
2030 * @state: vga_switcheroo state
2032 * Callback for the switcheroo driver. Suspends or resumes
2033 * the asics before or after it is powered up using ACPI methods.
2035 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2036 enum vga_switcheroo_state state)
2038 struct drm_device *dev = pci_get_drvdata(pdev);
2041 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
2044 if (state == VGA_SWITCHEROO_ON) {
2045 pr_info("switched on\n");
2046 /* don't suspend or resume card normally */
2047 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2049 pci_set_power_state(pdev, PCI_D0);
2050 amdgpu_device_load_pci_state(pdev);
2051 r = pci_enable_device(pdev);
2053 DRM_WARN("pci_enable_device failed (%d)\n", r);
2054 amdgpu_device_resume(dev, true);
2056 dev->switch_power_state = DRM_SWITCH_POWER_ON;
2058 pr_info("switched off\n");
2059 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2060 amdgpu_device_prepare(dev);
2061 amdgpu_device_suspend(dev, true);
2062 amdgpu_device_cache_pci_state(pdev);
2063 /* Shut down the device */
2064 pci_disable_device(pdev);
2065 pci_set_power_state(pdev, PCI_D3cold);
2066 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2071 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2073 * @pdev: pci dev pointer
2075 * Callback for the switcheroo driver. Check of the switcheroo
2076 * state can be changed.
2077 * Returns true if the state can be changed, false if not.
2079 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2081 struct drm_device *dev = pci_get_drvdata(pdev);
2084 * FIXME: open_count is protected by drm_global_mutex but that would lead to
2085 * locking inversion with the driver load path. And the access here is
2086 * completely racy anyway. So don't bother with locking for now.
2088 return atomic_read(&dev->open_count) == 0;
2091 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2092 .set_gpu_state = amdgpu_switcheroo_set_state,
2094 .can_switch = amdgpu_switcheroo_can_switch,
2098 * amdgpu_device_ip_set_clockgating_state - set the CG state
2100 * @dev: amdgpu_device pointer
2101 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2102 * @state: clockgating state (gate or ungate)
2104 * Sets the requested clockgating state for all instances of
2105 * the hardware IP specified.
2106 * Returns the error code from the last instance.
2108 int amdgpu_device_ip_set_clockgating_state(void *dev,
2109 enum amd_ip_block_type block_type,
2110 enum amd_clockgating_state state)
2112 struct amdgpu_device *adev = dev;
2115 for (i = 0; i < adev->num_ip_blocks; i++) {
2116 if (!adev->ip_blocks[i].status.valid)
2118 if (adev->ip_blocks[i].version->type != block_type)
2120 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2122 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2123 (void *)adev, state);
2125 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2126 adev->ip_blocks[i].version->funcs->name, r);
2132 * amdgpu_device_ip_set_powergating_state - set the PG state
2134 * @dev: amdgpu_device pointer
2135 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2136 * @state: powergating state (gate or ungate)
2138 * Sets the requested powergating state for all instances of
2139 * the hardware IP specified.
2140 * Returns the error code from the last instance.
2142 int amdgpu_device_ip_set_powergating_state(void *dev,
2143 enum amd_ip_block_type block_type,
2144 enum amd_powergating_state state)
2146 struct amdgpu_device *adev = dev;
2149 for (i = 0; i < adev->num_ip_blocks; i++) {
2150 if (!adev->ip_blocks[i].status.valid)
2152 if (adev->ip_blocks[i].version->type != block_type)
2154 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2156 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2157 (void *)adev, state);
2159 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2160 adev->ip_blocks[i].version->funcs->name, r);
2166 * amdgpu_device_ip_get_clockgating_state - get the CG state
2168 * @adev: amdgpu_device pointer
2169 * @flags: clockgating feature flags
2171 * Walks the list of IPs on the device and updates the clockgating
2172 * flags for each IP.
2173 * Updates @flags with the feature flags for each hardware IP where
2174 * clockgating is enabled.
2176 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2181 for (i = 0; i < adev->num_ip_blocks; i++) {
2182 if (!adev->ip_blocks[i].status.valid)
2184 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2185 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2190 * amdgpu_device_ip_wait_for_idle - wait for idle
2192 * @adev: amdgpu_device pointer
2193 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2195 * Waits for the request hardware IP to be idle.
2196 * Returns 0 for success or a negative error code on failure.
2198 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2199 enum amd_ip_block_type block_type)
2203 for (i = 0; i < adev->num_ip_blocks; i++) {
2204 if (!adev->ip_blocks[i].status.valid)
2206 if (adev->ip_blocks[i].version->type == block_type) {
2207 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2218 * amdgpu_device_ip_is_valid - is the hardware IP enabled
2220 * @adev: amdgpu_device pointer
2221 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2223 * Check if the hardware IP is enable or not.
2224 * Returns true if it the IP is enable, false if not.
2226 bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2227 enum amd_ip_block_type block_type)
2231 for (i = 0; i < adev->num_ip_blocks; i++) {
2232 if (adev->ip_blocks[i].version->type == block_type)
2233 return adev->ip_blocks[i].status.valid;
2240 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2242 * @adev: amdgpu_device pointer
2243 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2245 * Returns a pointer to the hardware IP block structure
2246 * if it exists for the asic, otherwise NULL.
2248 struct amdgpu_ip_block *
2249 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2250 enum amd_ip_block_type type)
2254 for (i = 0; i < adev->num_ip_blocks; i++)
2255 if (adev->ip_blocks[i].version->type == type)
2256 return &adev->ip_blocks[i];
2262 * amdgpu_device_ip_block_version_cmp
2264 * @adev: amdgpu_device pointer
2265 * @type: enum amd_ip_block_type
2266 * @major: major version
2267 * @minor: minor version
2269 * return 0 if equal or greater
2270 * return 1 if smaller or the ip_block doesn't exist
2272 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2273 enum amd_ip_block_type type,
2274 u32 major, u32 minor)
2276 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2278 if (ip_block && ((ip_block->version->major > major) ||
2279 ((ip_block->version->major == major) &&
2280 (ip_block->version->minor >= minor))))
2287 * amdgpu_device_ip_block_add
2289 * @adev: amdgpu_device pointer
2290 * @ip_block_version: pointer to the IP to add
2292 * Adds the IP block driver information to the collection of IPs
2295 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2296 const struct amdgpu_ip_block_version *ip_block_version)
2298 if (!ip_block_version)
2301 switch (ip_block_version->type) {
2302 case AMD_IP_BLOCK_TYPE_VCN:
2303 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2306 case AMD_IP_BLOCK_TYPE_JPEG:
2307 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2314 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2315 ip_block_version->funcs->name);
2317 adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2319 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2325 * amdgpu_device_enable_virtual_display - enable virtual display feature
2327 * @adev: amdgpu_device pointer
2329 * Enabled the virtual display feature if the user has enabled it via
2330 * the module parameter virtual_display. This feature provides a virtual
2331 * display hardware on headless boards or in virtualized environments.
2332 * This function parses and validates the configuration string specified by
2333 * the user and configues the virtual display configuration (number of
2334 * virtual connectors, crtcs, etc.) specified.
2336 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2338 adev->enable_virtual_display = false;
2340 if (amdgpu_virtual_display) {
2341 const char *pci_address_name = pci_name(adev->pdev);
2342 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2344 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2345 pciaddstr_tmp = pciaddstr;
2346 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2347 pciaddname = strsep(&pciaddname_tmp, ",");
2348 if (!strcmp("all", pciaddname)
2349 || !strcmp(pci_address_name, pciaddname)) {
2353 adev->enable_virtual_display = true;
2356 res = kstrtol(pciaddname_tmp, 10,
2364 adev->mode_info.num_crtc = num_crtc;
2366 adev->mode_info.num_crtc = 1;
2372 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2373 amdgpu_virtual_display, pci_address_name,
2374 adev->enable_virtual_display, adev->mode_info.num_crtc);
2380 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2382 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2383 adev->mode_info.num_crtc = 1;
2384 adev->enable_virtual_display = true;
2385 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2386 adev->enable_virtual_display, adev->mode_info.num_crtc);
2391 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2393 * @adev: amdgpu_device pointer
2395 * Parses the asic configuration parameters specified in the gpu info
2396 * firmware and makes them availale to the driver for use in configuring
2398 * Returns 0 on success, -EINVAL on failure.
2400 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2402 const char *chip_name;
2404 const struct gpu_info_firmware_header_v1_0 *hdr;
2406 adev->firmware.gpu_info_fw = NULL;
2408 if (adev->mman.discovery_bin)
2411 switch (adev->asic_type) {
2415 chip_name = "vega10";
2418 chip_name = "vega12";
2421 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2422 chip_name = "raven2";
2423 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2424 chip_name = "picasso";
2426 chip_name = "raven";
2429 chip_name = "arcturus";
2432 chip_name = "navi12";
2436 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2437 "amdgpu/%s_gpu_info.bin", chip_name);
2440 "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2445 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2446 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2448 switch (hdr->version_major) {
2451 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2452 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2453 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2456 * Should be droped when DAL no longer needs it.
2458 if (adev->asic_type == CHIP_NAVI12)
2459 goto parse_soc_bounding_box;
2461 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2462 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2463 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2464 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2465 adev->gfx.config.max_texture_channel_caches =
2466 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2467 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2468 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2469 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2470 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2471 adev->gfx.config.double_offchip_lds_buf =
2472 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2473 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2474 adev->gfx.cu_info.max_waves_per_simd =
2475 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2476 adev->gfx.cu_info.max_scratch_slots_per_cu =
2477 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2478 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2479 if (hdr->version_minor >= 1) {
2480 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2481 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2482 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2483 adev->gfx.config.num_sc_per_sh =
2484 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2485 adev->gfx.config.num_packer_per_sc =
2486 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2489 parse_soc_bounding_box:
2491 * soc bounding box info is not integrated in disocovery table,
2492 * we always need to parse it from gpu info firmware if needed.
2494 if (hdr->version_minor == 2) {
2495 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2496 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2497 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2498 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2504 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2513 * amdgpu_device_ip_early_init - run early init for hardware IPs
2515 * @adev: amdgpu_device pointer
2517 * Early initialization pass for hardware IPs. The hardware IPs that make
2518 * up each asic are discovered each IP's early_init callback is run. This
2519 * is the first stage in initializing the asic.
2520 * Returns 0 on success, negative error code on failure.
2522 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2524 struct amdgpu_ip_block *ip_block;
2525 struct pci_dev *parent;
2529 amdgpu_device_enable_virtual_display(adev);
2531 if (amdgpu_sriov_vf(adev)) {
2532 r = amdgpu_virt_request_full_gpu(adev, true);
2537 switch (adev->asic_type) {
2538 #ifdef CONFIG_DRM_AMDGPU_SI
2544 adev->family = AMDGPU_FAMILY_SI;
2545 r = si_set_ip_blocks(adev);
2550 #ifdef CONFIG_DRM_AMDGPU_CIK
2556 if (adev->flags & AMD_IS_APU)
2557 adev->family = AMDGPU_FAMILY_KV;
2559 adev->family = AMDGPU_FAMILY_CI;
2561 r = cik_set_ip_blocks(adev);
2569 case CHIP_POLARIS10:
2570 case CHIP_POLARIS11:
2571 case CHIP_POLARIS12:
2575 if (adev->flags & AMD_IS_APU)
2576 adev->family = AMDGPU_FAMILY_CZ;
2578 adev->family = AMDGPU_FAMILY_VI;
2580 r = vi_set_ip_blocks(adev);
2585 r = amdgpu_discovery_set_ip_blocks(adev);
2591 if (amdgpu_has_atpx() &&
2592 (amdgpu_is_atpx_hybrid() ||
2593 amdgpu_has_atpx_dgpu_power_cntl()) &&
2594 ((adev->flags & AMD_IS_APU) == 0) &&
2595 !dev_is_removable(&adev->pdev->dev))
2596 adev->flags |= AMD_IS_PX;
2598 if (!(adev->flags & AMD_IS_APU)) {
2599 parent = pcie_find_root_port(adev->pdev);
2600 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2604 adev->pm.pp_feature = amdgpu_pp_feature_mask;
2605 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2606 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2607 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2608 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2609 if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2610 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2613 for (i = 0; i < adev->num_ip_blocks; i++) {
2614 ip_block = &adev->ip_blocks[i];
2616 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2617 DRM_WARN("disabled ip block: %d <%s>\n",
2618 i, adev->ip_blocks[i].version->funcs->name);
2619 adev->ip_blocks[i].status.valid = false;
2620 } else if (ip_block->version->funcs->early_init) {
2621 r = ip_block->version->funcs->early_init(ip_block);
2623 adev->ip_blocks[i].status.valid = false;
2625 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2626 adev->ip_blocks[i].version->funcs->name, r);
2629 adev->ip_blocks[i].status.valid = true;
2632 adev->ip_blocks[i].status.valid = true;
2634 /* get the vbios after the asic_funcs are set up */
2635 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2636 r = amdgpu_device_parse_gpu_info_fw(adev);
2641 if (amdgpu_device_read_bios(adev)) {
2642 if (!amdgpu_get_bios(adev))
2645 r = amdgpu_atombios_init(adev);
2647 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2648 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2653 /*get pf2vf msg info at it's earliest time*/
2654 if (amdgpu_sriov_vf(adev))
2655 amdgpu_virt_init_data_exchange(adev);
2662 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2663 if (ip_block->status.valid != false)
2664 amdgpu_amdkfd_device_probe(adev);
2666 adev->cg_flags &= amdgpu_cg_mask;
2667 adev->pg_flags &= amdgpu_pg_mask;
2672 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2676 for (i = 0; i < adev->num_ip_blocks; i++) {
2677 if (!adev->ip_blocks[i].status.sw)
2679 if (adev->ip_blocks[i].status.hw)
2681 if (!amdgpu_ip_member_of_hwini(
2682 adev, adev->ip_blocks[i].version->type))
2684 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2685 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2686 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2687 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2689 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2690 adev->ip_blocks[i].version->funcs->name, r);
2693 adev->ip_blocks[i].status.hw = true;
2700 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2704 for (i = 0; i < adev->num_ip_blocks; i++) {
2705 if (!adev->ip_blocks[i].status.sw)
2707 if (adev->ip_blocks[i].status.hw)
2709 if (!amdgpu_ip_member_of_hwini(
2710 adev, adev->ip_blocks[i].version->type))
2712 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2714 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2715 adev->ip_blocks[i].version->funcs->name, r);
2718 adev->ip_blocks[i].status.hw = true;
2724 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2728 uint32_t smu_version;
2730 if (adev->asic_type >= CHIP_VEGA10) {
2731 for (i = 0; i < adev->num_ip_blocks; i++) {
2732 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2735 if (!amdgpu_ip_member_of_hwini(adev,
2736 AMD_IP_BLOCK_TYPE_PSP))
2739 if (!adev->ip_blocks[i].status.sw)
2742 /* no need to do the fw loading again if already done*/
2743 if (adev->ip_blocks[i].status.hw == true)
2746 if (amdgpu_in_reset(adev) || adev->in_suspend) {
2747 r = adev->ip_blocks[i].version->funcs->resume(adev);
2749 DRM_ERROR("resume of IP block <%s> failed %d\n",
2750 adev->ip_blocks[i].version->funcs->name, r);
2754 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2756 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2757 adev->ip_blocks[i].version->funcs->name, r);
2762 adev->ip_blocks[i].status.hw = true;
2767 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2768 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2773 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2778 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2779 struct amdgpu_ring *ring = adev->rings[i];
2781 /* No need to setup the GPU scheduler for rings that don't need it */
2782 if (!ring || ring->no_scheduler)
2785 switch (ring->funcs->type) {
2786 case AMDGPU_RING_TYPE_GFX:
2787 timeout = adev->gfx_timeout;
2789 case AMDGPU_RING_TYPE_COMPUTE:
2790 timeout = adev->compute_timeout;
2792 case AMDGPU_RING_TYPE_SDMA:
2793 timeout = adev->sdma_timeout;
2796 timeout = adev->video_timeout;
2800 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2801 DRM_SCHED_PRIORITY_COUNT,
2802 ring->num_hw_submission, 0,
2803 timeout, adev->reset_domain->wq,
2804 ring->sched_score, ring->name,
2807 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2811 r = amdgpu_uvd_entity_init(adev, ring);
2813 DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2817 r = amdgpu_vce_entity_init(adev, ring);
2819 DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2825 amdgpu_xcp_update_partition_sched_list(adev);
2832 * amdgpu_device_ip_init - run init for hardware IPs
2834 * @adev: amdgpu_device pointer
2836 * Main initialization pass for hardware IPs. The list of all the hardware
2837 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2838 * are run. sw_init initializes the software state associated with each IP
2839 * and hw_init initializes the hardware associated with each IP.
2840 * Returns 0 on success, negative error code on failure.
2842 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2847 r = amdgpu_ras_init(adev);
2851 for (i = 0; i < adev->num_ip_blocks; i++) {
2852 if (!adev->ip_blocks[i].status.valid)
2854 r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
2856 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2857 adev->ip_blocks[i].version->funcs->name, r);
2860 adev->ip_blocks[i].status.sw = true;
2862 if (!amdgpu_ip_member_of_hwini(
2863 adev, adev->ip_blocks[i].version->type))
2866 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2867 /* need to do common hw init early so everything is set up for gmc */
2868 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2870 DRM_ERROR("hw_init %d failed %d\n", i, r);
2873 adev->ip_blocks[i].status.hw = true;
2874 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2875 /* need to do gmc hw init early so we can allocate gpu mem */
2876 /* Try to reserve bad pages early */
2877 if (amdgpu_sriov_vf(adev))
2878 amdgpu_virt_exchange_data(adev);
2880 r = amdgpu_device_mem_scratch_init(adev);
2882 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2885 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2887 DRM_ERROR("hw_init %d failed %d\n", i, r);
2890 r = amdgpu_device_wb_init(adev);
2892 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2895 adev->ip_blocks[i].status.hw = true;
2897 /* right after GMC hw init, we create CSA */
2898 if (adev->gfx.mcbp) {
2899 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2900 AMDGPU_GEM_DOMAIN_VRAM |
2901 AMDGPU_GEM_DOMAIN_GTT,
2904 DRM_ERROR("allocate CSA failed %d\n", r);
2909 r = amdgpu_seq64_init(adev);
2911 DRM_ERROR("allocate seq64 failed %d\n", r);
2917 if (amdgpu_sriov_vf(adev))
2918 amdgpu_virt_init_data_exchange(adev);
2920 r = amdgpu_ib_pool_init(adev);
2922 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2923 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2927 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2931 r = amdgpu_device_ip_hw_init_phase1(adev);
2935 r = amdgpu_device_fw_loading(adev);
2939 r = amdgpu_device_ip_hw_init_phase2(adev);
2944 * retired pages will be loaded from eeprom and reserved here,
2945 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2946 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2947 * for I2C communication which only true at this point.
2949 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2950 * failure from bad gpu situation and stop amdgpu init process
2951 * accordingly. For other failed cases, it will still release all
2952 * the resource and print error message, rather than returning one
2953 * negative value to upper level.
2955 * Note: theoretically, this should be called before all vram allocations
2956 * to protect retired page from abusing
2958 init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
2959 r = amdgpu_ras_recovery_init(adev, init_badpage);
2964 * In case of XGMI grab extra reference for reset domain for this device
2966 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2967 if (amdgpu_xgmi_add_device(adev) == 0) {
2968 if (!amdgpu_sriov_vf(adev)) {
2969 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2971 if (WARN_ON(!hive)) {
2976 if (!hive->reset_domain ||
2977 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2979 amdgpu_put_xgmi_hive(hive);
2983 /* Drop the early temporary reset domain we created for device */
2984 amdgpu_reset_put_reset_domain(adev->reset_domain);
2985 adev->reset_domain = hive->reset_domain;
2986 amdgpu_put_xgmi_hive(hive);
2991 r = amdgpu_device_init_schedulers(adev);
2995 if (adev->mman.buffer_funcs_ring->sched.ready)
2996 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2998 /* Don't init kfd if whole hive need to be reset during init */
2999 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3000 kgd2kfd_init_zone_device(adev);
3001 amdgpu_amdkfd_device_init(adev);
3004 amdgpu_fru_get_product_info(adev);
3012 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3014 * @adev: amdgpu_device pointer
3016 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
3017 * this function before a GPU reset. If the value is retained after a
3018 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
3020 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3022 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3026 * amdgpu_device_check_vram_lost - check if vram is valid
3028 * @adev: amdgpu_device pointer
3030 * Checks the reset magic value written to the gart pointer in VRAM.
3031 * The driver calls this after a GPU reset to see if the contents of
3032 * VRAM is lost or now.
3033 * returns true if vram is lost, false if not.
3035 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3037 if (memcmp(adev->gart.ptr, adev->reset_magic,
3038 AMDGPU_RESET_MAGIC_NUM))
3041 if (!amdgpu_in_reset(adev))
3045 * For all ASICs with baco/mode1 reset, the VRAM is
3046 * always assumed to be lost.
3048 switch (amdgpu_asic_reset_method(adev)) {
3049 case AMD_RESET_METHOD_BACO:
3050 case AMD_RESET_METHOD_MODE1:
3058 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3060 * @adev: amdgpu_device pointer
3061 * @state: clockgating state (gate or ungate)
3063 * The list of all the hardware IPs that make up the asic is walked and the
3064 * set_clockgating_state callbacks are run.
3065 * Late initialization pass enabling clockgating for hardware IPs.
3066 * Fini or suspend, pass disabling clockgating for hardware IPs.
3067 * Returns 0 on success, negative error code on failure.
3070 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3071 enum amd_clockgating_state state)
3075 if (amdgpu_emu_mode == 1)
3078 for (j = 0; j < adev->num_ip_blocks; j++) {
3079 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3080 if (!adev->ip_blocks[i].status.late_initialized)
3082 /* skip CG for GFX, SDMA on S0ix */
3083 if (adev->in_s0ix &&
3084 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3085 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3087 /* skip CG for VCE/UVD, it's handled specially */
3088 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3089 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3090 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3091 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3092 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3093 /* enable clockgating to save power */
3094 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3097 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3098 adev->ip_blocks[i].version->funcs->name, r);
3107 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3108 enum amd_powergating_state state)
3112 if (amdgpu_emu_mode == 1)
3115 for (j = 0; j < adev->num_ip_blocks; j++) {
3116 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3117 if (!adev->ip_blocks[i].status.late_initialized)
3119 /* skip PG for GFX, SDMA on S0ix */
3120 if (adev->in_s0ix &&
3121 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3122 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3124 /* skip CG for VCE/UVD, it's handled specially */
3125 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3126 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3127 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3128 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3129 adev->ip_blocks[i].version->funcs->set_powergating_state) {
3130 /* enable powergating to save power */
3131 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3134 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3135 adev->ip_blocks[i].version->funcs->name, r);
3143 static int amdgpu_device_enable_mgpu_fan_boost(void)
3145 struct amdgpu_gpu_instance *gpu_ins;
3146 struct amdgpu_device *adev;
3149 mutex_lock(&mgpu_info.mutex);
3152 * MGPU fan boost feature should be enabled
3153 * only when there are two or more dGPUs in
3156 if (mgpu_info.num_dgpu < 2)
3159 for (i = 0; i < mgpu_info.num_dgpu; i++) {
3160 gpu_ins = &(mgpu_info.gpu_ins[i]);
3161 adev = gpu_ins->adev;
3162 if (!(adev->flags & AMD_IS_APU) &&
3163 !gpu_ins->mgpu_fan_enabled) {
3164 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3168 gpu_ins->mgpu_fan_enabled = 1;
3173 mutex_unlock(&mgpu_info.mutex);
3179 * amdgpu_device_ip_late_init - run late init for hardware IPs
3181 * @adev: amdgpu_device pointer
3183 * Late initialization pass for hardware IPs. The list of all the hardware
3184 * IPs that make up the asic is walked and the late_init callbacks are run.
3185 * late_init covers any special initialization that an IP requires
3186 * after all of the have been initialized or something that needs to happen
3187 * late in the init process.
3188 * Returns 0 on success, negative error code on failure.
3190 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3192 struct amdgpu_gpu_instance *gpu_instance;
3195 for (i = 0; i < adev->num_ip_blocks; i++) {
3196 if (!adev->ip_blocks[i].status.hw)
3198 if (adev->ip_blocks[i].version->funcs->late_init) {
3199 r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3201 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3202 adev->ip_blocks[i].version->funcs->name, r);
3206 adev->ip_blocks[i].status.late_initialized = true;
3209 r = amdgpu_ras_late_init(adev);
3211 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3215 if (!amdgpu_in_reset(adev))
3216 amdgpu_ras_set_error_query_ready(adev, true);
3218 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3219 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3221 amdgpu_device_fill_reset_magic(adev);
3223 r = amdgpu_device_enable_mgpu_fan_boost();
3225 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3227 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3228 if (amdgpu_passthrough(adev) &&
3229 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3230 adev->asic_type == CHIP_ALDEBARAN))
3231 amdgpu_dpm_handle_passthrough_sbr(adev, true);
3233 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3234 mutex_lock(&mgpu_info.mutex);
3237 * Reset device p-state to low as this was booted with high.
3239 * This should be performed only after all devices from the same
3240 * hive get initialized.
3242 * However, it's unknown how many device in the hive in advance.
3243 * As this is counted one by one during devices initializations.
3245 * So, we wait for all XGMI interlinked devices initialized.
3246 * This may bring some delays as those devices may come from
3247 * different hives. But that should be OK.
3249 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3250 for (i = 0; i < mgpu_info.num_gpu; i++) {
3251 gpu_instance = &(mgpu_info.gpu_ins[i]);
3252 if (gpu_instance->adev->flags & AMD_IS_APU)
3255 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3256 AMDGPU_XGMI_PSTATE_MIN);
3258 DRM_ERROR("pstate setting failed (%d).\n", r);
3264 mutex_unlock(&mgpu_info.mutex);
3271 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3273 * @adev: amdgpu_device pointer
3275 * For ASICs need to disable SMC first
3277 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3281 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3284 for (i = 0; i < adev->num_ip_blocks; i++) {
3285 if (!adev->ip_blocks[i].status.hw)
3287 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3288 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3289 /* XXX handle errors */
3291 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3292 adev->ip_blocks[i].version->funcs->name, r);
3294 adev->ip_blocks[i].status.hw = false;
3300 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3304 for (i = 0; i < adev->num_ip_blocks; i++) {
3305 if (!adev->ip_blocks[i].version->funcs->early_fini)
3308 r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3310 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3311 adev->ip_blocks[i].version->funcs->name, r);
3315 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3316 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3318 amdgpu_amdkfd_suspend(adev, false);
3320 /* Workaroud for ASICs need to disable SMC first */
3321 amdgpu_device_smu_fini_early(adev);
3323 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3324 if (!adev->ip_blocks[i].status.hw)
3327 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3328 /* XXX handle errors */
3330 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3331 adev->ip_blocks[i].version->funcs->name, r);
3334 adev->ip_blocks[i].status.hw = false;
3337 if (amdgpu_sriov_vf(adev)) {
3338 if (amdgpu_virt_release_full_gpu(adev, false))
3339 DRM_ERROR("failed to release exclusive mode on fini\n");
3346 * amdgpu_device_ip_fini - run fini for hardware IPs
3348 * @adev: amdgpu_device pointer
3350 * Main teardown pass for hardware IPs. The list of all the hardware
3351 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3352 * are run. hw_fini tears down the hardware associated with each IP
3353 * and sw_fini tears down any software state associated with each IP.
3354 * Returns 0 on success, negative error code on failure.
3356 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3360 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3361 amdgpu_virt_release_ras_err_handler_data(adev);
3363 if (adev->gmc.xgmi.num_physical_nodes > 1)
3364 amdgpu_xgmi_remove_device(adev);
3366 amdgpu_amdkfd_device_fini_sw(adev);
3368 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3369 if (!adev->ip_blocks[i].status.sw)
3372 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3373 amdgpu_ucode_free_bo(adev);
3374 amdgpu_free_static_csa(&adev->virt.csa_obj);
3375 amdgpu_device_wb_fini(adev);
3376 amdgpu_device_mem_scratch_fini(adev);
3377 amdgpu_ib_pool_fini(adev);
3378 amdgpu_seq64_fini(adev);
3381 r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3382 /* XXX handle errors */
3384 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3385 adev->ip_blocks[i].version->funcs->name, r);
3387 adev->ip_blocks[i].status.sw = false;
3388 adev->ip_blocks[i].status.valid = false;
3391 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3392 if (!adev->ip_blocks[i].status.late_initialized)
3394 if (adev->ip_blocks[i].version->funcs->late_fini)
3395 adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3396 adev->ip_blocks[i].status.late_initialized = false;
3399 amdgpu_ras_fini(adev);
3405 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3407 * @work: work_struct.
3409 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3411 struct amdgpu_device *adev =
3412 container_of(work, struct amdgpu_device, delayed_init_work.work);
3415 r = amdgpu_ib_ring_tests(adev);
3417 DRM_ERROR("ib ring test failed (%d).\n", r);
3420 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3422 struct amdgpu_device *adev =
3423 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3425 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3426 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3428 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3429 adev->gfx.gfx_off_state = true;
3433 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3435 * @adev: amdgpu_device pointer
3437 * Main suspend function for hardware IPs. The list of all the hardware
3438 * IPs that make up the asic is walked, clockgating is disabled and the
3439 * suspend callbacks are run. suspend puts the hardware and software state
3440 * in each IP into a state suitable for suspend.
3441 * Returns 0 on success, negative error code on failure.
3443 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3447 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3448 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3451 * Per PMFW team's suggestion, driver needs to handle gfxoff
3452 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3453 * scenario. Add the missing df cstate disablement here.
3455 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3456 dev_warn(adev->dev, "Failed to disallow df cstate");
3458 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3459 if (!adev->ip_blocks[i].status.valid)
3462 /* displays are handled separately */
3463 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3466 /* XXX handle errors */
3467 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3468 /* XXX handle errors */
3470 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3471 adev->ip_blocks[i].version->funcs->name, r);
3475 adev->ip_blocks[i].status.hw = false;
3482 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3484 * @adev: amdgpu_device pointer
3486 * Main suspend function for hardware IPs. The list of all the hardware
3487 * IPs that make up the asic is walked, clockgating is disabled and the
3488 * suspend callbacks are run. suspend puts the hardware and software state
3489 * in each IP into a state suitable for suspend.
3490 * Returns 0 on success, negative error code on failure.
3492 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3497 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3499 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3500 if (!adev->ip_blocks[i].status.valid)
3502 /* displays are handled in phase1 */
3503 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3505 /* PSP lost connection when err_event_athub occurs */
3506 if (amdgpu_ras_intr_triggered() &&
3507 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3508 adev->ip_blocks[i].status.hw = false;
3512 /* skip unnecessary suspend if we do not initialize them yet */
3513 if (!amdgpu_ip_member_of_hwini(
3514 adev, adev->ip_blocks[i].version->type))
3517 /* skip suspend of gfx/mes and psp for S0ix
3518 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3519 * like at runtime. PSP is also part of the always on hardware
3520 * so no need to suspend it.
3522 if (adev->in_s0ix &&
3523 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3524 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3525 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3528 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3529 if (adev->in_s0ix &&
3530 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3531 IP_VERSION(5, 0, 0)) &&
3532 (adev->ip_blocks[i].version->type ==
3533 AMD_IP_BLOCK_TYPE_SDMA))
3536 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3537 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3538 * from this location and RLC Autoload automatically also gets loaded
3539 * from here based on PMFW -> PSP message during re-init sequence.
3540 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3541 * the TMR and reload FWs again for IMU enabled APU ASICs.
3543 if (amdgpu_in_reset(adev) &&
3544 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3545 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3548 /* XXX handle errors */
3549 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3550 /* XXX handle errors */
3552 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3553 adev->ip_blocks[i].version->funcs->name, r);
3555 adev->ip_blocks[i].status.hw = false;
3556 /* handle putting the SMC in the appropriate state */
3557 if (!amdgpu_sriov_vf(adev)) {
3558 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3559 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3561 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3562 adev->mp1_state, r);
3573 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3575 * @adev: amdgpu_device pointer
3577 * Main suspend function for hardware IPs. The list of all the hardware
3578 * IPs that make up the asic is walked, clockgating is disabled and the
3579 * suspend callbacks are run. suspend puts the hardware and software state
3580 * in each IP into a state suitable for suspend.
3581 * Returns 0 on success, negative error code on failure.
3583 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3587 if (amdgpu_sriov_vf(adev)) {
3588 amdgpu_virt_fini_data_exchange(adev);
3589 amdgpu_virt_request_full_gpu(adev, false);
3592 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3594 r = amdgpu_device_ip_suspend_phase1(adev);
3597 r = amdgpu_device_ip_suspend_phase2(adev);
3599 if (amdgpu_sriov_vf(adev))
3600 amdgpu_virt_release_full_gpu(adev, false);
3605 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3609 static enum amd_ip_block_type ip_order[] = {
3610 AMD_IP_BLOCK_TYPE_COMMON,
3611 AMD_IP_BLOCK_TYPE_GMC,
3612 AMD_IP_BLOCK_TYPE_PSP,
3613 AMD_IP_BLOCK_TYPE_IH,
3616 for (i = 0; i < adev->num_ip_blocks; i++) {
3618 struct amdgpu_ip_block *block;
3620 block = &adev->ip_blocks[i];
3621 block->status.hw = false;
3623 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3625 if (block->version->type != ip_order[j] ||
3626 !block->status.valid)
3629 r = block->version->funcs->hw_init(adev);
3630 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3633 block->status.hw = true;
3640 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3644 static enum amd_ip_block_type ip_order[] = {
3645 AMD_IP_BLOCK_TYPE_SMC,
3646 AMD_IP_BLOCK_TYPE_DCE,
3647 AMD_IP_BLOCK_TYPE_GFX,
3648 AMD_IP_BLOCK_TYPE_SDMA,
3649 AMD_IP_BLOCK_TYPE_MES,
3650 AMD_IP_BLOCK_TYPE_UVD,
3651 AMD_IP_BLOCK_TYPE_VCE,
3652 AMD_IP_BLOCK_TYPE_VCN,
3653 AMD_IP_BLOCK_TYPE_JPEG
3656 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3658 struct amdgpu_ip_block *block;
3660 for (j = 0; j < adev->num_ip_blocks; j++) {
3661 block = &adev->ip_blocks[j];
3663 if (block->version->type != ip_order[i] ||
3664 !block->status.valid ||
3668 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3669 r = block->version->funcs->resume(adev);
3671 r = block->version->funcs->hw_init(adev);
3673 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3676 block->status.hw = true;
3684 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3686 * @adev: amdgpu_device pointer
3688 * First resume function for hardware IPs. The list of all the hardware
3689 * IPs that make up the asic is walked and the resume callbacks are run for
3690 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3691 * after a suspend and updates the software state as necessary. This
3692 * function is also used for restoring the GPU after a GPU reset.
3693 * Returns 0 on success, negative error code on failure.
3695 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3699 for (i = 0; i < adev->num_ip_blocks; i++) {
3700 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3702 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3703 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3704 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3705 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3707 r = adev->ip_blocks[i].version->funcs->resume(adev);
3709 DRM_ERROR("resume of IP block <%s> failed %d\n",
3710 adev->ip_blocks[i].version->funcs->name, r);
3713 adev->ip_blocks[i].status.hw = true;
3721 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3723 * @adev: amdgpu_device pointer
3725 * First resume function for hardware IPs. The list of all the hardware
3726 * IPs that make up the asic is walked and the resume callbacks are run for
3727 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3728 * functional state after a suspend and updates the software state as
3729 * necessary. This function is also used for restoring the GPU after a GPU
3731 * Returns 0 on success, negative error code on failure.
3733 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3737 for (i = 0; i < adev->num_ip_blocks; i++) {
3738 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3740 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3743 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3745 r = adev->ip_blocks[i].version->funcs->resume(adev);
3747 DRM_ERROR("resume of IP block <%s> failed %d\n",
3748 adev->ip_blocks[i].version->funcs->name, r);
3751 adev->ip_blocks[i].status.hw = true;
3758 * amdgpu_device_ip_resume - run resume for hardware IPs
3760 * @adev: amdgpu_device pointer
3762 * Main resume function for hardware IPs. The hardware IPs
3763 * are split into two resume functions because they are
3764 * also used in recovering from a GPU reset and some additional
3765 * steps need to be take between them. In this case (S3/S4) they are
3767 * Returns 0 on success, negative error code on failure.
3769 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3773 r = amdgpu_device_ip_resume_phase1(adev);
3777 r = amdgpu_device_fw_loading(adev);
3781 r = amdgpu_device_ip_resume_phase2(adev);
3783 if (adev->mman.buffer_funcs_ring->sched.ready)
3784 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3790 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3792 * @adev: amdgpu_device pointer
3794 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3796 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3798 if (amdgpu_sriov_vf(adev)) {
3799 if (adev->is_atom_fw) {
3800 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3801 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3803 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3804 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3807 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3808 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3813 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3815 * @asic_type: AMD asic type
3817 * Check if there is DC (new modesetting infrastructre) support for an asic.
3818 * returns true if DC has support, false if not.
3820 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3822 switch (asic_type) {
3823 #ifdef CONFIG_DRM_AMDGPU_SI
3827 /* chips with no display hardware */
3829 #if defined(CONFIG_DRM_AMD_DC)
3835 * We have systems in the wild with these ASICs that require
3836 * LVDS and VGA support which is not supported with DC.
3838 * Fallback to the non-DC driver here by default so as not to
3839 * cause regressions.
3841 #if defined(CONFIG_DRM_AMD_DC_SI)
3842 return amdgpu_dc > 0;
3851 * We have systems in the wild with these ASICs that require
3852 * VGA support which is not supported with DC.
3854 * Fallback to the non-DC driver here by default so as not to
3855 * cause regressions.
3857 return amdgpu_dc > 0;
3859 return amdgpu_dc != 0;
3863 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3870 * amdgpu_device_has_dc_support - check if dc is supported
3872 * @adev: amdgpu_device pointer
3874 * Returns true for supported, false for not supported
3876 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3878 if (adev->enable_virtual_display ||
3879 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3882 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3885 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3887 struct amdgpu_device *adev =
3888 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3889 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3891 /* It's a bug to not have a hive within this function */
3896 * Use task barrier to synchronize all xgmi reset works across the
3897 * hive. task_barrier_enter and task_barrier_exit will block
3898 * until all the threads running the xgmi reset works reach
3899 * those points. task_barrier_full will do both blocks.
3901 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3903 task_barrier_enter(&hive->tb);
3904 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3906 if (adev->asic_reset_res)
3909 task_barrier_exit(&hive->tb);
3910 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3912 if (adev->asic_reset_res)
3915 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3918 task_barrier_full(&hive->tb);
3919 adev->asic_reset_res = amdgpu_asic_reset(adev);
3923 if (adev->asic_reset_res)
3924 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3925 adev->asic_reset_res, adev_to_drm(adev)->unique);
3926 amdgpu_put_xgmi_hive(hive);
3929 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3931 char *input = amdgpu_lockup_timeout;
3932 char *timeout_setting = NULL;
3938 * By default timeout for non compute jobs is 10000
3939 * and 60000 for compute jobs.
3940 * In SR-IOV or passthrough mode, timeout for compute
3941 * jobs are 60000 by default.
3943 adev->gfx_timeout = msecs_to_jiffies(10000);
3944 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3945 if (amdgpu_sriov_vf(adev))
3946 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3947 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3949 adev->compute_timeout = msecs_to_jiffies(60000);
3951 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3952 while ((timeout_setting = strsep(&input, ",")) &&
3953 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3954 ret = kstrtol(timeout_setting, 0, &timeout);
3961 } else if (timeout < 0) {
3962 timeout = MAX_SCHEDULE_TIMEOUT;
3963 dev_warn(adev->dev, "lockup timeout disabled");
3964 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3966 timeout = msecs_to_jiffies(timeout);
3971 adev->gfx_timeout = timeout;
3974 adev->compute_timeout = timeout;
3977 adev->sdma_timeout = timeout;
3980 adev->video_timeout = timeout;
3987 * There is only one value specified and
3988 * it should apply to all non-compute jobs.
3991 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3992 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3993 adev->compute_timeout = adev->gfx_timeout;
4001 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4003 * @adev: amdgpu_device pointer
4005 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4007 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4009 struct iommu_domain *domain;
4011 domain = iommu_get_domain_for_dev(adev->dev);
4012 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4013 adev->ram_is_direct_mapped = true;
4016 #if defined(CONFIG_HSA_AMD_P2P)
4018 * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4020 * @adev: amdgpu_device pointer
4022 * return if IOMMU remapping bar address
4024 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4026 struct iommu_domain *domain;
4028 domain = iommu_get_domain_for_dev(adev->dev);
4029 if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4030 domain->type == IOMMU_DOMAIN_DMA_FQ))
4037 static const struct attribute *amdgpu_dev_attributes[] = {
4038 &dev_attr_pcie_replay_count.attr,
4042 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4044 if (amdgpu_mcbp == 1)
4045 adev->gfx.mcbp = true;
4046 else if (amdgpu_mcbp == 0)
4047 adev->gfx.mcbp = false;
4049 if (amdgpu_sriov_vf(adev))
4050 adev->gfx.mcbp = true;
4053 DRM_INFO("MCBP is enabled\n");
4057 * amdgpu_device_init - initialize the driver
4059 * @adev: amdgpu_device pointer
4060 * @flags: driver flags
4062 * Initializes the driver info and hw (all asics).
4063 * Returns 0 for success or an error on failure.
4064 * Called at driver startup.
4066 int amdgpu_device_init(struct amdgpu_device *adev,
4069 struct drm_device *ddev = adev_to_drm(adev);
4070 struct pci_dev *pdev = adev->pdev;
4076 adev->shutdown = false;
4077 adev->flags = flags;
4079 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4080 adev->asic_type = amdgpu_force_asic_type;
4082 adev->asic_type = flags & AMD_ASIC_MASK;
4084 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4085 if (amdgpu_emu_mode == 1)
4086 adev->usec_timeout *= 10;
4087 adev->gmc.gart_size = 512 * 1024 * 1024;
4088 adev->accel_working = false;
4089 adev->num_rings = 0;
4090 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4091 adev->mman.buffer_funcs = NULL;
4092 adev->mman.buffer_funcs_ring = NULL;
4093 adev->vm_manager.vm_pte_funcs = NULL;
4094 adev->vm_manager.vm_pte_num_scheds = 0;
4095 adev->gmc.gmc_funcs = NULL;
4096 adev->harvest_ip_mask = 0x0;
4097 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4098 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4100 adev->smc_rreg = &amdgpu_invalid_rreg;
4101 adev->smc_wreg = &amdgpu_invalid_wreg;
4102 adev->pcie_rreg = &amdgpu_invalid_rreg;
4103 adev->pcie_wreg = &amdgpu_invalid_wreg;
4104 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4105 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4106 adev->pciep_rreg = &amdgpu_invalid_rreg;
4107 adev->pciep_wreg = &amdgpu_invalid_wreg;
4108 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4109 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4110 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4111 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4112 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4113 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4114 adev->didt_rreg = &amdgpu_invalid_rreg;
4115 adev->didt_wreg = &amdgpu_invalid_wreg;
4116 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4117 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4118 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4119 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4121 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4122 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4123 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4125 /* mutex initialization are all done here so we
4126 * can recall function without having locking issues
4128 mutex_init(&adev->firmware.mutex);
4129 mutex_init(&adev->pm.mutex);
4130 mutex_init(&adev->gfx.gpu_clock_mutex);
4131 mutex_init(&adev->srbm_mutex);
4132 mutex_init(&adev->gfx.pipe_reserve_mutex);
4133 mutex_init(&adev->gfx.gfx_off_mutex);
4134 mutex_init(&adev->gfx.partition_mutex);
4135 mutex_init(&adev->grbm_idx_mutex);
4136 mutex_init(&adev->mn_lock);
4137 mutex_init(&adev->virt.vf_errors.lock);
4138 mutex_init(&adev->virt.rlcg_reg_lock);
4139 hash_init(adev->mn_hash);
4140 mutex_init(&adev->psp.mutex);
4141 mutex_init(&adev->notifier_lock);
4142 mutex_init(&adev->pm.stable_pstate_ctx_lock);
4143 mutex_init(&adev->benchmark_mutex);
4144 mutex_init(&adev->gfx.reset_sem_mutex);
4145 /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4146 mutex_init(&adev->enforce_isolation_mutex);
4147 mutex_init(&adev->gfx.kfd_sch_mutex);
4149 amdgpu_device_init_apu_flags(adev);
4151 r = amdgpu_device_check_arguments(adev);
4155 spin_lock_init(&adev->mmio_idx_lock);
4156 spin_lock_init(&adev->smc_idx_lock);
4157 spin_lock_init(&adev->pcie_idx_lock);
4158 spin_lock_init(&adev->uvd_ctx_idx_lock);
4159 spin_lock_init(&adev->didt_idx_lock);
4160 spin_lock_init(&adev->gc_cac_idx_lock);
4161 spin_lock_init(&adev->se_cac_idx_lock);
4162 spin_lock_init(&adev->audio_endpt_idx_lock);
4163 spin_lock_init(&adev->mm_stats.lock);
4164 spin_lock_init(&adev->wb.lock);
4166 INIT_LIST_HEAD(&adev->reset_list);
4168 INIT_LIST_HEAD(&adev->ras_list);
4170 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4172 INIT_DELAYED_WORK(&adev->delayed_init_work,
4173 amdgpu_device_delayed_init_work_handler);
4174 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4175 amdgpu_device_delay_enable_gfx_off);
4177 * Initialize the enforce_isolation work structures for each XCP
4178 * partition. This work handler is responsible for enforcing shader
4179 * isolation on AMD GPUs. It counts the number of emitted fences for
4180 * each GFX and compute ring. If there are any fences, it schedules
4181 * the `enforce_isolation_work` to be run after a delay. If there are
4182 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4185 for (i = 0; i < MAX_XCP; i++) {
4186 INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4187 amdgpu_gfx_enforce_isolation_handler);
4188 adev->gfx.enforce_isolation[i].adev = adev;
4189 adev->gfx.enforce_isolation[i].xcp_id = i;
4192 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4194 adev->gfx.gfx_off_req_count = 1;
4195 adev->gfx.gfx_off_residency = 0;
4196 adev->gfx.gfx_off_entrycount = 0;
4197 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4199 atomic_set(&adev->throttling_logging_enabled, 1);
4201 * If throttling continues, logging will be performed every minute
4202 * to avoid log flooding. "-1" is subtracted since the thermal
4203 * throttling interrupt comes every second. Thus, the total logging
4204 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4205 * for throttling interrupt) = 60 seconds.
4207 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4208 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4210 /* Registers mapping */
4211 /* TODO: block userspace mapping of io register */
4212 if (adev->asic_type >= CHIP_BONAIRE) {
4213 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4214 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4216 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4217 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4220 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4221 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4223 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4227 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4228 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4231 * Reset domain needs to be present early, before XGMI hive discovered
4232 * (if any) and intitialized to use reset sem and in_gpu reset flag
4233 * early on during init and before calling to RREG32.
4235 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4236 if (!adev->reset_domain)
4239 /* detect hw virtualization here */
4240 amdgpu_detect_virtualization(adev);
4242 amdgpu_device_get_pcie_info(adev);
4244 r = amdgpu_device_get_job_timeout_settings(adev);
4246 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4250 amdgpu_device_set_mcbp(adev);
4253 * By default, use default mode where all blocks are expected to be
4254 * initialized. At present a 'swinit' of blocks is required to be
4255 * completed before the need for a different level is detected.
4257 amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
4258 /* early init functions */
4259 r = amdgpu_device_ip_early_init(adev);
4263 /* Get rid of things like offb */
4264 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4268 /* Enable TMZ based on IP_VERSION */
4269 amdgpu_gmc_tmz_set(adev);
4271 if (amdgpu_sriov_vf(adev) &&
4272 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4273 /* VF MMIO access (except mailbox range) from CPU
4274 * will be blocked during sriov runtime
4276 adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4278 amdgpu_gmc_noretry_set(adev);
4279 /* Need to get xgmi info early to decide the reset behavior*/
4280 if (adev->gmc.xgmi.supported) {
4281 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4286 /* enable PCIE atomic ops */
4287 if (amdgpu_sriov_vf(adev)) {
4288 if (adev->virt.fw_reserve.p_pf2vf)
4289 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4290 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4291 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4292 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4293 * internal path natively support atomics, set have_atomics_support to true.
4295 } else if ((adev->flags & AMD_IS_APU) &&
4296 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4297 IP_VERSION(9, 0, 0))) {
4298 adev->have_atomics_support = true;
4300 adev->have_atomics_support =
4301 !pci_enable_atomic_ops_to_root(adev->pdev,
4302 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4303 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4306 if (!adev->have_atomics_support)
4307 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4309 /* doorbell bar mapping and doorbell index init*/
4310 amdgpu_doorbell_init(adev);
4312 if (amdgpu_emu_mode == 1) {
4313 /* post the asic on emulation mode */
4314 emu_soc_asic_init(adev);
4315 goto fence_driver_init;
4318 amdgpu_reset_init(adev);
4320 /* detect if we are with an SRIOV vbios */
4322 amdgpu_device_detect_sriov_bios(adev);
4324 /* check if we need to reset the asic
4325 * E.g., driver was not cleanly unloaded previously, etc.
4327 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4328 if (adev->gmc.xgmi.num_physical_nodes) {
4329 dev_info(adev->dev, "Pending hive reset.\n");
4330 amdgpu_set_init_level(adev,
4331 AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4332 } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4333 !amdgpu_device_has_display_hardware(adev)) {
4334 r = psp_gpu_reset(adev);
4336 tmp = amdgpu_reset_method;
4337 /* It should do a default reset when loading or reloading the driver,
4338 * regardless of the module parameter reset_method.
4340 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4341 r = amdgpu_asic_reset(adev);
4342 amdgpu_reset_method = tmp;
4346 dev_err(adev->dev, "asic reset on init failed\n");
4351 /* Post card if necessary */
4352 if (amdgpu_device_need_post(adev)) {
4354 dev_err(adev->dev, "no vBIOS found\n");
4358 DRM_INFO("GPU posting now...\n");
4359 r = amdgpu_device_asic_init(adev);
4361 dev_err(adev->dev, "gpu post error!\n");
4367 if (adev->is_atom_fw) {
4368 /* Initialize clocks */
4369 r = amdgpu_atomfirmware_get_clock_info(adev);
4371 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4372 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4376 /* Initialize clocks */
4377 r = amdgpu_atombios_get_clock_info(adev);
4379 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4380 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4383 /* init i2c buses */
4384 if (!amdgpu_device_has_dc_support(adev))
4385 amdgpu_atombios_i2c_init(adev);
4391 r = amdgpu_fence_driver_sw_init(adev);
4393 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4394 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4398 /* init the mode config */
4399 drm_mode_config_init(adev_to_drm(adev));
4401 r = amdgpu_device_ip_init(adev);
4403 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4404 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4405 goto release_ras_con;
4408 amdgpu_fence_driver_hw_init(adev);
4411 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4412 adev->gfx.config.max_shader_engines,
4413 adev->gfx.config.max_sh_per_se,
4414 adev->gfx.config.max_cu_per_sh,
4415 adev->gfx.cu_info.number);
4417 adev->accel_working = true;
4419 amdgpu_vm_check_compute_bug(adev);
4421 /* Initialize the buffer migration limit. */
4422 if (amdgpu_moverate >= 0)
4423 max_MBps = amdgpu_moverate;
4425 max_MBps = 8; /* Allow 8 MB/s. */
4426 /* Get a log2 for easy divisions. */
4427 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4430 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4431 * Otherwise the mgpu fan boost feature will be skipped due to the
4432 * gpu instance is counted less.
4434 amdgpu_register_gpu_instance(adev);
4436 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4437 * explicit gating rather than handling it automatically.
4439 if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4440 r = amdgpu_device_ip_late_init(adev);
4442 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4443 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4444 goto release_ras_con;
4447 amdgpu_ras_resume(adev);
4448 queue_delayed_work(system_wq, &adev->delayed_init_work,
4449 msecs_to_jiffies(AMDGPU_RESUME_MS));
4452 if (amdgpu_sriov_vf(adev)) {
4453 amdgpu_virt_release_full_gpu(adev, true);
4454 flush_delayed_work(&adev->delayed_init_work);
4458 * Place those sysfs registering after `late_init`. As some of those
4459 * operations performed in `late_init` might affect the sysfs
4460 * interfaces creating.
4462 r = amdgpu_atombios_sysfs_init(adev);
4464 drm_err(&adev->ddev,
4465 "registering atombios sysfs failed (%d).\n", r);
4467 r = amdgpu_pm_sysfs_init(adev);
4469 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4471 r = amdgpu_ucode_sysfs_init(adev);
4473 adev->ucode_sysfs_en = false;
4474 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4476 adev->ucode_sysfs_en = true;
4478 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4480 dev_err(adev->dev, "Could not create amdgpu device attr\n");
4482 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4485 "Could not create amdgpu board attributes\n");
4487 amdgpu_fru_sysfs_init(adev);
4488 amdgpu_reg_state_sysfs_init(adev);
4489 amdgpu_xcp_cfg_sysfs_init(adev);
4491 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4492 r = amdgpu_pmu_init(adev);
4494 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4496 /* Have stored pci confspace at hand for restore in sudden PCI error */
4497 if (amdgpu_device_cache_pci_state(adev->pdev))
4498 pci_restore_state(pdev);
4500 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4501 /* this will fail for cards that aren't VGA class devices, just
4504 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4505 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4507 px = amdgpu_device_supports_px(ddev);
4509 if (px || (!dev_is_removable(&adev->pdev->dev) &&
4510 apple_gmux_detect(NULL, NULL)))
4511 vga_switcheroo_register_client(adev->pdev,
4512 &amdgpu_switcheroo_ops, px);
4515 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4517 if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4518 amdgpu_xgmi_reset_on_init(adev);
4520 amdgpu_device_check_iommu_direct_map(adev);
4525 if (amdgpu_sriov_vf(adev))
4526 amdgpu_virt_release_full_gpu(adev, true);
4528 /* failed in exclusive mode due to timeout */
4529 if (amdgpu_sriov_vf(adev) &&
4530 !amdgpu_sriov_runtime(adev) &&
4531 amdgpu_virt_mmio_blocked(adev) &&
4532 !amdgpu_virt_wait_reset(adev)) {
4533 dev_err(adev->dev, "VF exclusive mode timeout\n");
4534 /* Don't send request since VF is inactive. */
4535 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4536 adev->virt.ops = NULL;
4539 amdgpu_release_ras_context(adev);
4542 amdgpu_vf_error_trans_all(adev);
4547 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4550 /* Clear all CPU mappings pointing to this device */
4551 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4553 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4554 amdgpu_doorbell_fini(adev);
4556 iounmap(adev->rmmio);
4558 if (adev->mman.aper_base_kaddr)
4559 iounmap(adev->mman.aper_base_kaddr);
4560 adev->mman.aper_base_kaddr = NULL;
4562 /* Memory manager related */
4563 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4564 arch_phys_wc_del(adev->gmc.vram_mtrr);
4565 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4570 * amdgpu_device_fini_hw - tear down the driver
4572 * @adev: amdgpu_device pointer
4574 * Tear down the driver info (all asics).
4575 * Called at driver shutdown.
4577 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4579 dev_info(adev->dev, "amdgpu: finishing device.\n");
4580 flush_delayed_work(&adev->delayed_init_work);
4582 if (adev->mman.initialized)
4583 drain_workqueue(adev->mman.bdev.wq);
4584 adev->shutdown = true;
4586 /* make sure IB test finished before entering exclusive mode
4587 * to avoid preemption on IB test
4589 if (amdgpu_sriov_vf(adev)) {
4590 amdgpu_virt_request_full_gpu(adev, false);
4591 amdgpu_virt_fini_data_exchange(adev);
4594 /* disable all interrupts */
4595 amdgpu_irq_disable_all(adev);
4596 if (adev->mode_info.mode_config_initialized) {
4597 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4598 drm_helper_force_disable_all(adev_to_drm(adev));
4600 drm_atomic_helper_shutdown(adev_to_drm(adev));
4602 amdgpu_fence_driver_hw_fini(adev);
4604 if (adev->pm.sysfs_initialized)
4605 amdgpu_pm_sysfs_fini(adev);
4606 if (adev->ucode_sysfs_en)
4607 amdgpu_ucode_sysfs_fini(adev);
4608 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4609 amdgpu_fru_sysfs_fini(adev);
4611 amdgpu_reg_state_sysfs_fini(adev);
4612 amdgpu_xcp_cfg_sysfs_fini(adev);
4614 /* disable ras feature must before hw fini */
4615 amdgpu_ras_pre_fini(adev);
4617 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4619 amdgpu_device_ip_fini_early(adev);
4621 amdgpu_irq_fini_hw(adev);
4623 if (adev->mman.initialized)
4624 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4626 amdgpu_gart_dummy_page_fini(adev);
4628 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4629 amdgpu_device_unmap_mmio(adev);
4633 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4638 amdgpu_fence_driver_sw_fini(adev);
4639 amdgpu_device_ip_fini(adev);
4640 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4641 adev->accel_working = false;
4642 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4644 amdgpu_reset_fini(adev);
4646 /* free i2c buses */
4647 if (!amdgpu_device_has_dc_support(adev))
4648 amdgpu_i2c_fini(adev);
4650 if (amdgpu_emu_mode != 1)
4651 amdgpu_atombios_fini(adev);
4656 kfree(adev->fru_info);
4657 adev->fru_info = NULL;
4659 px = amdgpu_device_supports_px(adev_to_drm(adev));
4661 if (px || (!dev_is_removable(&adev->pdev->dev) &&
4662 apple_gmux_detect(NULL, NULL)))
4663 vga_switcheroo_unregister_client(adev->pdev);
4666 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4668 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4669 vga_client_unregister(adev->pdev);
4671 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4673 iounmap(adev->rmmio);
4675 amdgpu_doorbell_fini(adev);
4679 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4680 amdgpu_pmu_fini(adev);
4681 if (adev->mman.discovery_bin)
4682 amdgpu_discovery_fini(adev);
4684 amdgpu_reset_put_reset_domain(adev->reset_domain);
4685 adev->reset_domain = NULL;
4687 kfree(adev->pci_state);
4692 * amdgpu_device_evict_resources - evict device resources
4693 * @adev: amdgpu device object
4695 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4696 * of the vram memory type. Mainly used for evicting device resources
4700 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4704 /* No need to evict vram on APUs for suspend to ram or s2idle */
4705 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4708 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4710 DRM_WARN("evicting device resources failed\n");
4718 * amdgpu_device_prepare - prepare for device suspend
4720 * @dev: drm dev pointer
4722 * Prepare to put the hw in the suspend state (all asics).
4723 * Returns 0 for success or an error on failure.
4724 * Called at driver suspend.
4726 int amdgpu_device_prepare(struct drm_device *dev)
4728 struct amdgpu_device *adev = drm_to_adev(dev);
4731 amdgpu_choose_low_power_state(adev);
4733 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4736 /* Evict the majority of BOs before starting suspend sequence */
4737 r = amdgpu_device_evict_resources(adev);
4741 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4743 for (i = 0; i < adev->num_ip_blocks; i++) {
4744 if (!adev->ip_blocks[i].status.valid)
4746 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4748 r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
4756 adev->in_s0ix = adev->in_s3 = false;
4762 * amdgpu_device_suspend - initiate device suspend
4764 * @dev: drm dev pointer
4765 * @fbcon : notify the fbdev of suspend
4767 * Puts the hw in the suspend state (all asics).
4768 * Returns 0 for success or an error on failure.
4769 * Called at driver suspend.
4771 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4773 struct amdgpu_device *adev = drm_to_adev(dev);
4776 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4779 adev->in_suspend = true;
4781 if (amdgpu_sriov_vf(adev)) {
4782 amdgpu_virt_fini_data_exchange(adev);
4783 r = amdgpu_virt_request_full_gpu(adev, false);
4788 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4789 DRM_WARN("smart shift update failed\n");
4792 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4794 cancel_delayed_work_sync(&adev->delayed_init_work);
4796 amdgpu_ras_suspend(adev);
4798 amdgpu_device_ip_suspend_phase1(adev);
4801 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4803 r = amdgpu_device_evict_resources(adev);
4807 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4809 amdgpu_fence_driver_hw_fini(adev);
4811 amdgpu_device_ip_suspend_phase2(adev);
4813 if (amdgpu_sriov_vf(adev))
4814 amdgpu_virt_release_full_gpu(adev, false);
4816 r = amdgpu_dpm_notify_rlc_state(adev, false);
4824 * amdgpu_device_resume - initiate device resume
4826 * @dev: drm dev pointer
4827 * @fbcon : notify the fbdev of resume
4829 * Bring the hw back to operating state (all asics).
4830 * Returns 0 for success or an error on failure.
4831 * Called at driver resume.
4833 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4835 struct amdgpu_device *adev = drm_to_adev(dev);
4838 if (amdgpu_sriov_vf(adev)) {
4839 r = amdgpu_virt_request_full_gpu(adev, true);
4844 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4848 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4851 if (amdgpu_device_need_post(adev)) {
4852 r = amdgpu_device_asic_init(adev);
4854 dev_err(adev->dev, "amdgpu asic init failed\n");
4857 r = amdgpu_device_ip_resume(adev);
4860 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4863 amdgpu_fence_driver_hw_init(adev);
4865 if (!adev->in_s0ix) {
4866 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4871 r = amdgpu_device_ip_late_init(adev);
4875 queue_delayed_work(system_wq, &adev->delayed_init_work,
4876 msecs_to_jiffies(AMDGPU_RESUME_MS));
4878 if (amdgpu_sriov_vf(adev)) {
4879 amdgpu_virt_init_data_exchange(adev);
4880 amdgpu_virt_release_full_gpu(adev, true);
4886 /* Make sure IB tests flushed */
4887 flush_delayed_work(&adev->delayed_init_work);
4890 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4892 amdgpu_ras_resume(adev);
4894 if (adev->mode_info.num_crtc) {
4896 * Most of the connector probing functions try to acquire runtime pm
4897 * refs to ensure that the GPU is powered on when connector polling is
4898 * performed. Since we're calling this from a runtime PM callback,
4899 * trying to acquire rpm refs will cause us to deadlock.
4901 * Since we're guaranteed to be holding the rpm lock, it's safe to
4902 * temporarily disable the rpm helpers so this doesn't deadlock us.
4905 dev->dev->power.disable_depth++;
4907 if (!adev->dc_enabled)
4908 drm_helper_hpd_irq_event(dev);
4910 drm_kms_helper_hotplug_event(dev);
4912 dev->dev->power.disable_depth--;
4915 adev->in_suspend = false;
4917 if (adev->enable_mes)
4918 amdgpu_mes_self_test(adev);
4920 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4921 DRM_WARN("smart shift update failed\n");
4927 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4929 * @adev: amdgpu_device pointer
4931 * The list of all the hardware IPs that make up the asic is walked and
4932 * the check_soft_reset callbacks are run. check_soft_reset determines
4933 * if the asic is still hung or not.
4934 * Returns true if any of the IPs are still in a hung state, false if not.
4936 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4939 bool asic_hang = false;
4941 if (amdgpu_sriov_vf(adev))
4944 if (amdgpu_asic_need_full_reset(adev))
4947 for (i = 0; i < adev->num_ip_blocks; i++) {
4948 if (!adev->ip_blocks[i].status.valid)
4950 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4951 adev->ip_blocks[i].status.hang =
4952 adev->ip_blocks[i].version->funcs->check_soft_reset(
4953 &adev->ip_blocks[i]);
4954 if (adev->ip_blocks[i].status.hang) {
4955 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4963 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4965 * @adev: amdgpu_device pointer
4967 * The list of all the hardware IPs that make up the asic is walked and the
4968 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4969 * handles any IP specific hardware or software state changes that are
4970 * necessary for a soft reset to succeed.
4971 * Returns 0 on success, negative error code on failure.
4973 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4977 for (i = 0; i < adev->num_ip_blocks; i++) {
4978 if (!adev->ip_blocks[i].status.valid)
4980 if (adev->ip_blocks[i].status.hang &&
4981 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4982 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
4992 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4994 * @adev: amdgpu_device pointer
4996 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4997 * reset is necessary to recover.
4998 * Returns true if a full asic reset is required, false if not.
5000 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5004 if (amdgpu_asic_need_full_reset(adev))
5007 for (i = 0; i < adev->num_ip_blocks; i++) {
5008 if (!adev->ip_blocks[i].status.valid)
5010 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5011 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5012 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5013 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5014 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5015 if (adev->ip_blocks[i].status.hang) {
5016 dev_info(adev->dev, "Some block need full reset!\n");
5025 * amdgpu_device_ip_soft_reset - do a soft reset
5027 * @adev: amdgpu_device pointer
5029 * The list of all the hardware IPs that make up the asic is walked and the
5030 * soft_reset callbacks are run if the block is hung. soft_reset handles any
5031 * IP specific hardware or software state changes that are necessary to soft
5033 * Returns 0 on success, negative error code on failure.
5035 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5039 for (i = 0; i < adev->num_ip_blocks; i++) {
5040 if (!adev->ip_blocks[i].status.valid)
5042 if (adev->ip_blocks[i].status.hang &&
5043 adev->ip_blocks[i].version->funcs->soft_reset) {
5044 r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5054 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5056 * @adev: amdgpu_device pointer
5058 * The list of all the hardware IPs that make up the asic is walked and the
5059 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
5060 * handles any IP specific hardware or software state changes that are
5061 * necessary after the IP has been soft reset.
5062 * Returns 0 on success, negative error code on failure.
5064 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5068 for (i = 0; i < adev->num_ip_blocks; i++) {
5069 if (!adev->ip_blocks[i].status.valid)
5071 if (adev->ip_blocks[i].status.hang &&
5072 adev->ip_blocks[i].version->funcs->post_soft_reset)
5073 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
5082 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5084 * @adev: amdgpu_device pointer
5085 * @reset_context: amdgpu reset context pointer
5087 * do VF FLR and reinitialize Asic
5088 * return 0 means succeeded otherwise failed
5090 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5091 struct amdgpu_reset_context *reset_context)
5094 struct amdgpu_hive_info *hive = NULL;
5096 if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5097 if (!amdgpu_ras_get_fed_status(adev))
5098 amdgpu_virt_ready_to_reset(adev);
5099 amdgpu_virt_wait_reset(adev);
5100 clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5101 r = amdgpu_virt_request_full_gpu(adev, true);
5103 r = amdgpu_virt_reset_gpu(adev);
5108 amdgpu_ras_set_fed(adev, false);
5109 amdgpu_irq_gpu_reset_resume_helper(adev);
5111 /* some sw clean up VF needs to do before recover */
5112 amdgpu_virt_post_reset(adev);
5114 /* Resume IP prior to SMC */
5115 r = amdgpu_device_ip_reinit_early_sriov(adev);
5119 amdgpu_virt_init_data_exchange(adev);
5121 r = amdgpu_device_fw_loading(adev);
5125 /* now we are okay to resume SMC/CP/SDMA */
5126 r = amdgpu_device_ip_reinit_late_sriov(adev);
5130 hive = amdgpu_get_xgmi_hive(adev);
5131 /* Update PSP FW topology after reset */
5132 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5133 r = amdgpu_xgmi_update_topology(hive, adev);
5135 amdgpu_put_xgmi_hive(hive);
5139 r = amdgpu_ib_ring_tests(adev);
5143 if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5144 amdgpu_inc_vram_lost(adev);
5146 /* need to be called during full access so we can't do it later like
5149 amdgpu_amdkfd_post_reset(adev);
5150 amdgpu_virt_release_full_gpu(adev, true);
5152 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5153 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5154 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5155 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5156 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5157 amdgpu_ras_resume(adev);
5162 * amdgpu_device_has_job_running - check if there is any job in mirror list
5164 * @adev: amdgpu_device pointer
5166 * check if there is any job in mirror list
5168 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5171 struct drm_sched_job *job;
5173 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5174 struct amdgpu_ring *ring = adev->rings[i];
5176 if (!amdgpu_ring_sched_ready(ring))
5179 spin_lock(&ring->sched.job_list_lock);
5180 job = list_first_entry_or_null(&ring->sched.pending_list,
5181 struct drm_sched_job, list);
5182 spin_unlock(&ring->sched.job_list_lock);
5190 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5192 * @adev: amdgpu_device pointer
5194 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5197 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5200 if (amdgpu_gpu_recovery == 0)
5203 /* Skip soft reset check in fatal error mode */
5204 if (!amdgpu_ras_is_poison_mode_supported(adev))
5207 if (amdgpu_sriov_vf(adev))
5210 if (amdgpu_gpu_recovery == -1) {
5211 switch (adev->asic_type) {
5212 #ifdef CONFIG_DRM_AMDGPU_SI
5219 #ifdef CONFIG_DRM_AMDGPU_CIK
5226 case CHIP_CYAN_SKILLFISH:
5236 dev_info(adev->dev, "GPU recovery disabled.\n");
5240 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5245 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5247 dev_info(adev->dev, "GPU mode1 reset\n");
5249 /* Cache the state before bus master disable. The saved config space
5250 * values are used in other cases like restore after mode-2 reset.
5252 amdgpu_device_cache_pci_state(adev->pdev);
5255 pci_clear_master(adev->pdev);
5257 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5258 dev_info(adev->dev, "GPU smu mode1 reset\n");
5259 ret = amdgpu_dpm_mode1_reset(adev);
5261 dev_info(adev->dev, "GPU psp mode1 reset\n");
5262 ret = psp_gpu_reset(adev);
5266 goto mode1_reset_failed;
5268 amdgpu_device_load_pci_state(adev->pdev);
5269 ret = amdgpu_psp_wait_for_bootloader(adev);
5271 goto mode1_reset_failed;
5273 /* wait for asic to come out of reset */
5274 for (i = 0; i < adev->usec_timeout; i++) {
5275 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5277 if (memsize != 0xffffffff)
5282 if (i >= adev->usec_timeout) {
5284 goto mode1_reset_failed;
5287 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5292 dev_err(adev->dev, "GPU mode1 reset failed\n");
5296 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5297 struct amdgpu_reset_context *reset_context)
5300 struct amdgpu_job *job = NULL;
5301 struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5302 bool need_full_reset =
5303 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5305 if (reset_context->reset_req_dev == adev)
5306 job = reset_context->job;
5308 if (amdgpu_sriov_vf(adev))
5309 amdgpu_virt_pre_reset(adev);
5311 amdgpu_fence_driver_isr_toggle(adev, true);
5313 /* block all schedulers and reset given job's ring */
5314 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5315 struct amdgpu_ring *ring = adev->rings[i];
5317 if (!amdgpu_ring_sched_ready(ring))
5320 /* Clear job fence from fence drv to avoid force_completion
5321 * leave NULL and vm flush fence in fence drv
5323 amdgpu_fence_driver_clear_job_fences(ring);
5325 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5326 amdgpu_fence_driver_force_completion(ring);
5329 amdgpu_fence_driver_isr_toggle(adev, false);
5332 drm_sched_increase_karma(&job->base);
5334 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5335 /* If reset handler not implemented, continue; otherwise return */
5336 if (r == -EOPNOTSUPP)
5341 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5342 if (!amdgpu_sriov_vf(adev)) {
5344 if (!need_full_reset)
5345 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5347 if (!need_full_reset && amdgpu_gpu_recovery &&
5348 amdgpu_device_ip_check_soft_reset(adev)) {
5349 amdgpu_device_ip_pre_soft_reset(adev);
5350 r = amdgpu_device_ip_soft_reset(adev);
5351 amdgpu_device_ip_post_soft_reset(adev);
5352 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5353 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5354 need_full_reset = true;
5358 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5359 dev_info(tmp_adev->dev, "Dumping IP State\n");
5360 /* Trigger ip dump before we reset the asic */
5361 for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5362 if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5363 tmp_adev->ip_blocks[i].version->funcs
5364 ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5365 dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5368 if (need_full_reset)
5369 r = amdgpu_device_ip_suspend(adev);
5370 if (need_full_reset)
5371 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5373 clear_bit(AMDGPU_NEED_FULL_RESET,
5374 &reset_context->flags);
5380 int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5382 struct list_head *device_list_handle;
5383 bool full_reset, vram_lost = false;
5384 struct amdgpu_device *tmp_adev;
5387 device_list_handle = reset_context->reset_device_list;
5389 if (!device_list_handle)
5392 full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5395 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5396 /* After reset, it's default init level */
5397 amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
5400 amdgpu_ras_set_fed(tmp_adev, false);
5401 r = amdgpu_device_asic_init(tmp_adev);
5403 dev_warn(tmp_adev->dev, "asic atom init failed!");
5405 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5407 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5411 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5413 if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5414 amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5417 DRM_INFO("VRAM is lost due to GPU reset!\n");
5418 amdgpu_inc_vram_lost(tmp_adev);
5421 r = amdgpu_device_fw_loading(tmp_adev);
5425 r = amdgpu_xcp_restore_partition_mode(
5430 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5434 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5435 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5438 amdgpu_device_fill_reset_magic(tmp_adev);
5441 * Add this ASIC as tracked as reset was already
5442 * complete successfully.
5444 amdgpu_register_gpu_instance(tmp_adev);
5446 if (!reset_context->hive &&
5447 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5448 amdgpu_xgmi_add_device(tmp_adev);
5450 r = amdgpu_device_ip_late_init(tmp_adev);
5454 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5457 * The GPU enters bad state once faulty pages
5458 * by ECC has reached the threshold, and ras
5459 * recovery is scheduled next. So add one check
5460 * here to break recovery if it indeed exceeds
5461 * bad page threshold, and remind user to
5462 * retire this GPU or setting one bigger
5463 * bad_page_threshold value to fix this once
5464 * probing driver again.
5466 if (!amdgpu_ras_is_rma(tmp_adev)) {
5468 amdgpu_ras_resume(tmp_adev);
5474 /* Update PSP FW topology after reset */
5475 if (reset_context->hive &&
5476 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5477 r = amdgpu_xgmi_update_topology(
5478 reset_context->hive, tmp_adev);
5484 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5485 r = amdgpu_ib_ring_tests(tmp_adev);
5487 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5494 tmp_adev->asic_reset_res = r;
5501 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5502 struct amdgpu_reset_context *reset_context)
5504 struct amdgpu_device *tmp_adev = NULL;
5505 bool need_full_reset, skip_hw_reset;
5508 /* Try reset handler method first */
5509 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5512 reset_context->reset_device_list = device_list_handle;
5513 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5514 /* If reset handler not implemented, continue; otherwise return */
5515 if (r == -EOPNOTSUPP)
5520 /* Reset handler not implemented, use the default method */
5522 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5523 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5526 * ASIC reset has to be done on all XGMI hive nodes ASAP
5527 * to allow proper links negotiation in FW (within 1 sec)
5529 if (!skip_hw_reset && need_full_reset) {
5530 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5531 /* For XGMI run all resets in parallel to speed up the process */
5532 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5533 if (!queue_work(system_unbound_wq,
5534 &tmp_adev->xgmi_reset_work))
5537 r = amdgpu_asic_reset(tmp_adev);
5540 dev_err(tmp_adev->dev,
5541 "ASIC reset failed with error, %d for drm dev, %s",
5542 r, adev_to_drm(tmp_adev)->unique);
5547 /* For XGMI wait for all resets to complete before proceed */
5549 list_for_each_entry(tmp_adev, device_list_handle,
5551 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5552 flush_work(&tmp_adev->xgmi_reset_work);
5553 r = tmp_adev->asic_reset_res;
5561 if (!r && amdgpu_ras_intr_triggered()) {
5562 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5563 amdgpu_ras_reset_error_count(tmp_adev,
5564 AMDGPU_RAS_BLOCK__MMHUB);
5567 amdgpu_ras_intr_cleared();
5570 r = amdgpu_device_reinit_after_reset(reset_context);
5572 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5574 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5580 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5583 switch (amdgpu_asic_reset_method(adev)) {
5584 case AMD_RESET_METHOD_MODE1:
5585 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5587 case AMD_RESET_METHOD_MODE2:
5588 adev->mp1_state = PP_MP1_STATE_RESET;
5591 adev->mp1_state = PP_MP1_STATE_NONE;
5596 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5598 amdgpu_vf_error_trans_all(adev);
5599 adev->mp1_state = PP_MP1_STATE_NONE;
5602 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5604 struct pci_dev *p = NULL;
5606 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5607 adev->pdev->bus->number, 1);
5609 pm_runtime_enable(&(p->dev));
5610 pm_runtime_resume(&(p->dev));
5616 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5618 enum amd_reset_method reset_method;
5619 struct pci_dev *p = NULL;
5623 * For now, only BACO and mode1 reset are confirmed
5624 * to suffer the audio issue without proper suspended.
5626 reset_method = amdgpu_asic_reset_method(adev);
5627 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5628 (reset_method != AMD_RESET_METHOD_MODE1))
5631 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5632 adev->pdev->bus->number, 1);
5636 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5639 * If we cannot get the audio device autosuspend delay,
5640 * a fixed 4S interval will be used. Considering 3S is
5641 * the audio controller default autosuspend delay setting.
5642 * 4S used here is guaranteed to cover that.
5644 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5646 while (!pm_runtime_status_suspended(&(p->dev))) {
5647 if (!pm_runtime_suspend(&(p->dev)))
5650 if (expires < ktime_get_mono_fast_ns()) {
5651 dev_warn(adev->dev, "failed to suspend display audio\n");
5653 /* TODO: abort the succeeding gpu reset? */
5658 pm_runtime_disable(&(p->dev));
5664 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5666 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5668 #if defined(CONFIG_DEBUG_FS)
5669 if (!amdgpu_sriov_vf(adev))
5670 cancel_work(&adev->reset_work);
5674 cancel_work(&adev->kfd.reset_work);
5676 if (amdgpu_sriov_vf(adev))
5677 cancel_work(&adev->virt.flr_work);
5679 if (con && adev->ras_enabled)
5680 cancel_work(&con->recovery_work);
5684 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5686 struct amdgpu_device *tmp_adev;
5690 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5691 pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5692 if (PCI_POSSIBLE_ERROR(status)) {
5693 dev_err(tmp_adev->dev, "device lost from bus!");
5702 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5704 * @adev: amdgpu_device pointer
5705 * @job: which job trigger hang
5706 * @reset_context: amdgpu reset context pointer
5708 * Attempt to reset the GPU if it has hung (all asics).
5709 * Attempt to do soft-reset or full-reset and reinitialize Asic
5710 * Returns 0 for success or an error on failure.
5713 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5714 struct amdgpu_job *job,
5715 struct amdgpu_reset_context *reset_context)
5717 struct list_head device_list, *device_list_handle = NULL;
5718 bool job_signaled = false;
5719 struct amdgpu_hive_info *hive = NULL;
5720 struct amdgpu_device *tmp_adev = NULL;
5722 bool need_emergency_restart = false;
5723 bool audio_suspended = false;
5724 int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5727 * Special case: RAS triggered and full reset isn't supported
5729 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5732 * Flush RAM to disk so that after reboot
5733 * the user can read log and see why the system rebooted.
5735 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5736 amdgpu_ras_get_context(adev)->reboot) {
5737 DRM_WARN("Emergency reboot.");
5740 emergency_restart();
5743 dev_info(adev->dev, "GPU %s begin!\n",
5744 need_emergency_restart ? "jobs stop":"reset");
5746 if (!amdgpu_sriov_vf(adev))
5747 hive = amdgpu_get_xgmi_hive(adev);
5749 mutex_lock(&hive->hive_lock);
5751 reset_context->job = job;
5752 reset_context->hive = hive;
5754 * Build list of devices to reset.
5755 * In case we are in XGMI hive mode, resort the device list
5756 * to put adev in the 1st position.
5758 INIT_LIST_HEAD(&device_list);
5759 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5760 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5761 list_add_tail(&tmp_adev->reset_list, &device_list);
5763 tmp_adev->shutdown = true;
5765 if (!list_is_first(&adev->reset_list, &device_list))
5766 list_rotate_to_front(&adev->reset_list, &device_list);
5767 device_list_handle = &device_list;
5769 list_add_tail(&adev->reset_list, &device_list);
5770 device_list_handle = &device_list;
5773 if (!amdgpu_sriov_vf(adev)) {
5774 r = amdgpu_device_health_check(device_list_handle);
5779 /* We need to lock reset domain only once both for XGMI and single device */
5780 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5782 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5784 /* block all schedulers and reset given job's ring */
5785 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5787 amdgpu_device_set_mp1_state(tmp_adev);
5790 * Try to put the audio codec into suspend state
5791 * before gpu reset started.
5793 * Due to the power domain of the graphics device
5794 * is shared with AZ power domain. Without this,
5795 * we may change the audio hardware from behind
5796 * the audio driver's back. That will trigger
5797 * some audio codec errors.
5799 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5800 audio_suspended = true;
5802 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5804 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5806 amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5809 * Mark these ASICs to be reseted as untracked first
5810 * And add them back after reset completed
5812 amdgpu_unregister_gpu_instance(tmp_adev);
5814 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5816 /* disable ras on ALL IPs */
5817 if (!need_emergency_restart &&
5818 amdgpu_device_ip_need_full_reset(tmp_adev))
5819 amdgpu_ras_suspend(tmp_adev);
5821 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5822 struct amdgpu_ring *ring = tmp_adev->rings[i];
5824 if (!amdgpu_ring_sched_ready(ring))
5827 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5829 if (need_emergency_restart)
5830 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5832 atomic_inc(&tmp_adev->gpu_reset_counter);
5835 if (need_emergency_restart)
5836 goto skip_sched_resume;
5839 * Must check guilty signal here since after this point all old
5840 * HW fences are force signaled.
5842 * job->base holds a reference to parent fence
5844 if (job && dma_fence_is_signaled(&job->hw_fence)) {
5845 job_signaled = true;
5846 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5850 retry: /* Rest of adevs pre asic reset from XGMI hive. */
5851 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5852 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5853 /*TODO Should we stop ?*/
5855 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5856 r, adev_to_drm(tmp_adev)->unique);
5857 tmp_adev->asic_reset_res = r;
5861 /* Actual ASIC resets if needed.*/
5862 /* Host driver will handle XGMI hive reset for SRIOV */
5863 if (amdgpu_sriov_vf(adev)) {
5864 if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5865 dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5866 amdgpu_ras_set_fed(adev, true);
5867 set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5870 r = amdgpu_device_reset_sriov(adev, reset_context);
5871 if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5872 amdgpu_virt_release_full_gpu(adev, true);
5876 adev->asic_reset_res = r;
5878 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5879 if (r && r == -EAGAIN)
5883 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5885 * Drop any pending non scheduler resets queued before reset is done.
5886 * Any reset scheduled after this point would be valid. Scheduler resets
5887 * were already dropped during drm_sched_stop and no new ones can come
5888 * in before drm_sched_start.
5890 amdgpu_device_stop_pending_resets(tmp_adev);
5895 /* Post ASIC reset for all devs .*/
5896 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5898 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5899 struct amdgpu_ring *ring = tmp_adev->rings[i];
5901 if (!amdgpu_ring_sched_ready(ring))
5904 drm_sched_start(&ring->sched);
5907 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
5908 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5910 if (tmp_adev->asic_reset_res)
5911 r = tmp_adev->asic_reset_res;
5913 tmp_adev->asic_reset_res = 0;
5916 /* bad news, how to tell it to userspace ?
5917 * for ras error, we should report GPU bad status instead of
5920 if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
5921 !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
5922 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
5923 atomic_read(&tmp_adev->gpu_reset_counter));
5924 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5926 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5927 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5928 DRM_WARN("smart shift update failed\n");
5933 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5934 /* unlock kfd: SRIOV would do it separately */
5935 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5936 amdgpu_amdkfd_post_reset(tmp_adev);
5938 /* kfd_post_reset will do nothing if kfd device is not initialized,
5939 * need to bring up kfd here if it's not be initialized before
5941 if (!adev->kfd.init_complete)
5942 amdgpu_amdkfd_device_init(adev);
5944 if (audio_suspended)
5945 amdgpu_device_resume_display_audio(tmp_adev);
5947 amdgpu_device_unset_mp1_state(tmp_adev);
5949 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5952 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5954 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5958 mutex_unlock(&hive->hive_lock);
5959 amdgpu_put_xgmi_hive(hive);
5963 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5965 atomic_set(&adev->reset_domain->reset_res, r);
5970 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5972 * @adev: amdgpu_device pointer
5973 * @speed: pointer to the speed of the link
5974 * @width: pointer to the width of the link
5976 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5977 * first physical partner to an AMD dGPU.
5978 * This will exclude any virtual switches and links.
5980 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5981 enum pci_bus_speed *speed,
5982 enum pcie_link_width *width)
5984 struct pci_dev *parent = adev->pdev;
5986 if (!speed || !width)
5989 *speed = PCI_SPEED_UNKNOWN;
5990 *width = PCIE_LNK_WIDTH_UNKNOWN;
5992 if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
5993 while ((parent = pci_upstream_bridge(parent))) {
5994 /* skip upstream/downstream switches internal to dGPU*/
5995 if (parent->vendor == PCI_VENDOR_ID_ATI)
5997 *speed = pcie_get_speed_cap(parent);
5998 *width = pcie_get_width_cap(parent);
6002 /* use the current speeds rather than max if switching is not supported */
6003 pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6008 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6010 * @adev: amdgpu_device pointer
6012 * Fetchs and stores in the driver the PCIE capabilities (gen speed
6013 * and lanes) of the slot the device is in. Handles APUs and
6014 * virtualized environments where PCIE config space may not be available.
6016 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6018 struct pci_dev *pdev;
6019 enum pci_bus_speed speed_cap, platform_speed_cap;
6020 enum pcie_link_width platform_link_width;
6022 if (amdgpu_pcie_gen_cap)
6023 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6025 if (amdgpu_pcie_lane_cap)
6026 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6028 /* covers APUs as well */
6029 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6030 if (adev->pm.pcie_gen_mask == 0)
6031 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6032 if (adev->pm.pcie_mlw_mask == 0)
6033 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6037 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6040 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6041 &platform_link_width);
6043 if (adev->pm.pcie_gen_mask == 0) {
6046 speed_cap = pcie_get_speed_cap(pdev);
6047 if (speed_cap == PCI_SPEED_UNKNOWN) {
6048 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6049 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6050 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6052 if (speed_cap == PCIE_SPEED_32_0GT)
6053 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6054 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6055 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6056 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6057 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6058 else if (speed_cap == PCIE_SPEED_16_0GT)
6059 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6060 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6061 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6062 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6063 else if (speed_cap == PCIE_SPEED_8_0GT)
6064 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6065 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6066 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6067 else if (speed_cap == PCIE_SPEED_5_0GT)
6068 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6069 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6071 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6074 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6075 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6076 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6078 if (platform_speed_cap == PCIE_SPEED_32_0GT)
6079 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6080 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6081 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6082 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6083 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6084 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6085 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6086 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6087 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6088 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6089 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6090 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6091 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6092 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6093 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6094 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6095 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6097 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6101 if (adev->pm.pcie_mlw_mask == 0) {
6102 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6103 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6105 switch (platform_link_width) {
6107 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6108 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6109 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6110 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6111 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6112 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6113 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6116 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6117 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6118 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6119 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6120 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6121 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6124 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6125 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6126 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6127 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6128 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6131 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6132 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6133 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6134 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6137 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6138 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6139 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6142 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6143 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6146 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6156 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6158 * @adev: amdgpu_device pointer
6159 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6161 * Return true if @peer_adev can access (DMA) @adev through the PCIe
6162 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6165 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6166 struct amdgpu_device *peer_adev)
6168 #ifdef CONFIG_HSA_AMD_P2P
6170 !adev->gmc.xgmi.connected_to_cpu &&
6171 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6173 bool is_large_bar = adev->gmc.visible_vram_size &&
6174 adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6175 bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6177 if (!p2p_addressable) {
6178 uint64_t address_mask = peer_adev->dev->dma_mask ?
6179 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6180 resource_size_t aper_limit =
6181 adev->gmc.aper_base + adev->gmc.aper_size - 1;
6183 p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6184 aper_limit & address_mask);
6186 return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6192 int amdgpu_device_baco_enter(struct drm_device *dev)
6194 struct amdgpu_device *adev = drm_to_adev(dev);
6195 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6197 if (!amdgpu_device_supports_baco(dev))
6200 if (ras && adev->ras_enabled &&
6201 adev->nbio.funcs->enable_doorbell_interrupt)
6202 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6204 return amdgpu_dpm_baco_enter(adev);
6207 int amdgpu_device_baco_exit(struct drm_device *dev)
6209 struct amdgpu_device *adev = drm_to_adev(dev);
6210 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6213 if (!amdgpu_device_supports_baco(dev))
6216 ret = amdgpu_dpm_baco_exit(adev);
6220 if (ras && adev->ras_enabled &&
6221 adev->nbio.funcs->enable_doorbell_interrupt)
6222 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6224 if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6225 adev->nbio.funcs->clear_doorbell_interrupt)
6226 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6232 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6233 * @pdev: PCI device struct
6234 * @state: PCI channel state
6236 * Description: Called when a PCI error is detected.
6238 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6240 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6242 struct drm_device *dev = pci_get_drvdata(pdev);
6243 struct amdgpu_device *adev = drm_to_adev(dev);
6246 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6248 if (adev->gmc.xgmi.num_physical_nodes > 1) {
6249 DRM_WARN("No support for XGMI hive yet...");
6250 return PCI_ERS_RESULT_DISCONNECT;
6253 adev->pci_channel_state = state;
6256 case pci_channel_io_normal:
6257 return PCI_ERS_RESULT_CAN_RECOVER;
6258 /* Fatal error, prepare for slot reset */
6259 case pci_channel_io_frozen:
6261 * Locking adev->reset_domain->sem will prevent any external access
6262 * to GPU during PCI error recovery
6264 amdgpu_device_lock_reset_domain(adev->reset_domain);
6265 amdgpu_device_set_mp1_state(adev);
6268 * Block any work scheduling as we do for regular GPU reset
6269 * for the duration of the recovery
6271 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6272 struct amdgpu_ring *ring = adev->rings[i];
6274 if (!amdgpu_ring_sched_ready(ring))
6277 drm_sched_stop(&ring->sched, NULL);
6279 atomic_inc(&adev->gpu_reset_counter);
6280 return PCI_ERS_RESULT_NEED_RESET;
6281 case pci_channel_io_perm_failure:
6282 /* Permanent error, prepare for device removal */
6283 return PCI_ERS_RESULT_DISCONNECT;
6286 return PCI_ERS_RESULT_NEED_RESET;
6290 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6291 * @pdev: pointer to PCI device
6293 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6296 DRM_INFO("PCI error: mmio enabled callback!!\n");
6298 /* TODO - dump whatever for debugging purposes */
6300 /* This called only if amdgpu_pci_error_detected returns
6301 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6302 * works, no need to reset slot.
6305 return PCI_ERS_RESULT_RECOVERED;
6309 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6310 * @pdev: PCI device struct
6312 * Description: This routine is called by the pci error recovery
6313 * code after the PCI slot has been reset, just before we
6314 * should resume normal operations.
6316 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6318 struct drm_device *dev = pci_get_drvdata(pdev);
6319 struct amdgpu_device *adev = drm_to_adev(dev);
6321 struct amdgpu_reset_context reset_context;
6323 struct list_head device_list;
6325 /* PCI error slot reset should be skipped During RAS recovery */
6326 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6327 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6328 amdgpu_ras_in_recovery(adev))
6329 return PCI_ERS_RESULT_RECOVERED;
6331 DRM_INFO("PCI error: slot reset callback!!\n");
6333 memset(&reset_context, 0, sizeof(reset_context));
6335 INIT_LIST_HEAD(&device_list);
6336 list_add_tail(&adev->reset_list, &device_list);
6338 /* wait for asic to come out of reset */
6341 /* Restore PCI confspace */
6342 amdgpu_device_load_pci_state(pdev);
6344 /* confirm ASIC came out of reset */
6345 for (i = 0; i < adev->usec_timeout; i++) {
6346 memsize = amdgpu_asic_get_config_memsize(adev);
6348 if (memsize != 0xffffffff)
6352 if (memsize == 0xffffffff) {
6357 reset_context.method = AMD_RESET_METHOD_NONE;
6358 reset_context.reset_req_dev = adev;
6359 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6360 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6362 adev->no_hw_access = true;
6363 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6364 adev->no_hw_access = false;
6368 r = amdgpu_do_asic_reset(&device_list, &reset_context);
6372 if (amdgpu_device_cache_pci_state(adev->pdev))
6373 pci_restore_state(adev->pdev);
6375 DRM_INFO("PCIe error recovery succeeded\n");
6377 DRM_ERROR("PCIe error recovery failed, err:%d", r);
6378 amdgpu_device_unset_mp1_state(adev);
6379 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6382 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6386 * amdgpu_pci_resume() - resume normal ops after PCI reset
6387 * @pdev: pointer to PCI device
6389 * Called when the error recovery driver tells us that its
6390 * OK to resume normal operation.
6392 void amdgpu_pci_resume(struct pci_dev *pdev)
6394 struct drm_device *dev = pci_get_drvdata(pdev);
6395 struct amdgpu_device *adev = drm_to_adev(dev);
6399 DRM_INFO("PCI error: resume callback!!\n");
6401 /* Only continue execution for the case of pci_channel_io_frozen */
6402 if (adev->pci_channel_state != pci_channel_io_frozen)
6405 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6406 struct amdgpu_ring *ring = adev->rings[i];
6408 if (!amdgpu_ring_sched_ready(ring))
6411 drm_sched_start(&ring->sched);
6414 amdgpu_device_unset_mp1_state(adev);
6415 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6418 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6420 struct drm_device *dev = pci_get_drvdata(pdev);
6421 struct amdgpu_device *adev = drm_to_adev(dev);
6424 r = pci_save_state(pdev);
6426 kfree(adev->pci_state);
6428 adev->pci_state = pci_store_saved_state(pdev);
6430 if (!adev->pci_state) {
6431 DRM_ERROR("Failed to store PCI saved state");
6435 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6442 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6444 struct drm_device *dev = pci_get_drvdata(pdev);
6445 struct amdgpu_device *adev = drm_to_adev(dev);
6448 if (!adev->pci_state)
6451 r = pci_load_saved_state(pdev, adev->pci_state);
6454 pci_restore_state(pdev);
6456 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6463 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6464 struct amdgpu_ring *ring)
6466 #ifdef CONFIG_X86_64
6467 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6470 if (adev->gmc.xgmi.connected_to_cpu)
6473 if (ring && ring->funcs->emit_hdp_flush)
6474 amdgpu_ring_emit_hdp_flush(ring);
6476 amdgpu_asic_flush_hdp(adev, ring);
6479 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6480 struct amdgpu_ring *ring)
6482 #ifdef CONFIG_X86_64
6483 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6486 if (adev->gmc.xgmi.connected_to_cpu)
6489 amdgpu_asic_invalidate_hdp(adev, ring);
6492 int amdgpu_in_reset(struct amdgpu_device *adev)
6494 return atomic_read(&adev->reset_domain->in_gpu_reset);
6498 * amdgpu_device_halt() - bring hardware to some kind of halt state
6500 * @adev: amdgpu_device pointer
6502 * Bring hardware to some kind of halt state so that no one can touch it
6503 * any more. It will help to maintain error context when error occurred.
6504 * Compare to a simple hang, the system will keep stable at least for SSH
6505 * access. Then it should be trivial to inspect the hardware state and
6506 * see what's going on. Implemented as following:
6508 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6509 * clears all CPU mappings to device, disallows remappings through page faults
6510 * 2. amdgpu_irq_disable_all() disables all interrupts
6511 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6512 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6513 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6514 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6515 * flush any in flight DMA operations
6517 void amdgpu_device_halt(struct amdgpu_device *adev)
6519 struct pci_dev *pdev = adev->pdev;
6520 struct drm_device *ddev = adev_to_drm(adev);
6522 amdgpu_xcp_dev_unplug(adev);
6523 drm_dev_unplug(ddev);
6525 amdgpu_irq_disable_all(adev);
6527 amdgpu_fence_driver_hw_fini(adev);
6529 adev->no_hw_access = true;
6531 amdgpu_device_unmap_mmio(adev);
6533 pci_disable_device(pdev);
6534 pci_wait_for_pending_transaction(pdev);
6537 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6540 unsigned long flags, address, data;
6543 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6544 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6546 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6547 WREG32(address, reg * 4);
6548 (void)RREG32(address);
6550 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6554 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6557 unsigned long flags, address, data;
6559 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6560 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6562 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6563 WREG32(address, reg * 4);
6564 (void)RREG32(address);
6567 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6571 * amdgpu_device_get_gang - return a reference to the current gang
6572 * @adev: amdgpu_device pointer
6574 * Returns: A new reference to the current gang leader.
6576 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6578 struct dma_fence *fence;
6581 fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6587 * amdgpu_device_switch_gang - switch to a new gang
6588 * @adev: amdgpu_device pointer
6589 * @gang: the gang to switch to
6591 * Try to switch to a new gang.
6592 * Returns: NULL if we switched to the new gang or a reference to the current
6595 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6596 struct dma_fence *gang)
6598 struct dma_fence *old = NULL;
6602 old = amdgpu_device_get_gang(adev);
6606 if (!dma_fence_is_signaled(old))
6609 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6616 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6618 switch (adev->asic_type) {
6619 #ifdef CONFIG_DRM_AMDGPU_SI
6623 /* chips with no display hardware */
6625 #ifdef CONFIG_DRM_AMDGPU_SI
6631 #ifdef CONFIG_DRM_AMDGPU_CIK
6640 case CHIP_POLARIS10:
6641 case CHIP_POLARIS11:
6642 case CHIP_POLARIS12:
6646 /* chips with display hardware */
6650 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6651 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6657 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6658 uint32_t inst, uint32_t reg_addr, char reg_name[],
6659 uint32_t expected_value, uint32_t mask)
6663 uint32_t tmp_ = RREG32(reg_addr);
6664 uint32_t loop = adev->usec_timeout;
6666 while ((tmp_ & (mask)) != (expected_value)) {
6668 loop = adev->usec_timeout;
6672 tmp_ = RREG32(reg_addr);
6675 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6676 inst, reg_name, (uint32_t)expected_value,
6677 (uint32_t)(tmp_ & (mask)));