Merge tag 'drm-intel-next-fixes-2020-10-22' of git://anongit.freedesktop.org/drm...

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_drv.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index 26127c7d2f32d6ccabc5d65fee42907493367519..c241317edee7813a8530336d43f319582b766ebe 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -26,12 +26,12 @@
  #include <drm/drm_drv.h>
  #include <drm/drm_gem.h>
  #include <drm/drm_vblank.h>
+#include <drm/drm_managed.h>
  #include "amdgpu_drv.h"
  
  #include <drm/drm_pciids.h>
  #include <linux/console.h>
  #include <linux/module.h>
-#include <linux/pci.h>
  #include <linux/pm_runtime.h>
  #include <linux/vga_switcheroo.h>
  #include <drm/drm_probe_helper.h>
@@ -88,9 +88,10 @@
   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
   */
  #define KMS_DRIVER_MAJOR       3
-#define KMS_DRIVER_MINOR       39
+#define KMS_DRIVER_MINOR       40
  #define KMS_DRIVER_PATCHLEVEL  0
  
  int amdgpu_vram_limit = 0;
@@ -146,16 +147,18 @@ int amdgpu_async_gfx_ring = 1;
  int amdgpu_mcbp = 0;
  int amdgpu_discovery = -1;
  int amdgpu_mes = 0;
-int amdgpu_noretry;
+int amdgpu_noretry = -1;
  int amdgpu_force_asic_type = -1;
  int amdgpu_tmz = 0;
  int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
  
  struct amdgpu_mgpu_info mgpu_info = {
         .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
  };
  int amdgpu_ras_enable = -1;
  uint amdgpu_ras_mask = 0xffffffff;
+int amdgpu_bad_page_threshold = -1;
  
  /**
   * DOC: vramlimit (int)
@@ -393,12 +396,12 @@ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default
  module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
  
  /**
- * DOC: ppfeaturemask (uint)
+ * DOC: ppfeaturemask (hexint)
   * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
   * The default is the current set of stable power features.
   */
  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
-module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444);
  
  /**
   * DOC: forcelongtraining (uint)
@@ -593,8 +596,13 @@ MODULE_PARM_DESC(mes,
         "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
  module_param_named(mes, amdgpu_mes, int, 0444);
  
+/**
+ * DOC: noretry (int)
+ * Disable retry faults in the GPU memory controller.
+ * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
+ */
  MODULE_PARM_DESC(noretry,
-       "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)");
+       "Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))");
  module_param_named(noretry, amdgpu_noretry, int, 0644);
  
  /**
@@ -676,11 +684,14 @@ MODULE_PARM_DESC(debug_largebar,
   * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
   * table to get information about AMD APUs. This option can serve as a workaround on
   * systems with a broken CRAT table.
+ *
+ * Default is auto (according to asic type, iommu_v2, and crat table, to decide
+ * whehter use CRAT)
   */
  int ignore_crat;
  module_param(ignore_crat, int, 0444);
  MODULE_PARM_DESC(ignore_crat,
-       "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+       "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
  
  /**
   * DOC: halt_if_hws_hang (int)
@@ -715,6 +726,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1
  bool debug_evictions;
  module_param(debug_evictions, bool, 0644);
  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+
  #endif
  
  /**
@@ -765,6 +785,19 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
  module_param_named(reset_method, amdgpu_reset_method, int, 0444);
  
+/**
+ * DOC: bad_page_threshold (int)
+ * Bad page threshold is to specify the threshold value of faulty pages
+ * detected by RAS ECC, that may result in GPU entering bad status if total
+ * faulty pages by ECC exceed threshold value and leave it for user's further
+ * check.
+ */
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
+module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
+
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
  static const struct pci_device_id pciidlist[] = {
  #ifdef  CONFIG_DRM_AMDGPU_SI
         {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1044,8 +1077,16 @@ static const struct pci_device_id pciidlist[] = {
         {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
  
         /* Navi12 */
-       {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+       {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+
+       /* Sienna_Cichlid */
+       {0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
  
         {0, 0, 0}
  };
@@ -1057,7 +1098,7 @@ static struct drm_driver kms_driver;
  static int amdgpu_pci_probe(struct pci_dev *pdev,
                             const struct pci_device_id *ent)
  {
-       struct drm_device *dev;
+       struct drm_device *ddev;
         struct amdgpu_device *adev;
         unsigned long flags = ent->driver_data;
         int ret, retry = 0;
@@ -1073,6 +1114,16 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
                 return -ENODEV;
         }
  
+       /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
+        * however, SME requires an indirect IOMMU mapping because the encryption
+        * bit is beyond the DMA mask of the chip.
+        */
+       if (mem_encrypt_active() && ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
+               dev_info(&pdev->dev,
+                        "SME is not compatible with RAVEN\n");
+               return -ENOTSUPP;
+       }
+
  #ifdef CONFIG_DRM_AMDGPU_SI
         if (!amdgpu_si_support) {
                 switch (flags & AMD_ASIC_MASK) {
@@ -1113,36 +1164,39 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         if (ret)
                 return ret;
  
-       dev = drm_dev_alloc(&kms_driver, &pdev->dev);
-       if (IS_ERR(dev))
-               return PTR_ERR(dev);
+       adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), ddev);
+       if (IS_ERR(adev))
+               return PTR_ERR(adev);
+
+       adev->dev  = &pdev->dev;
+       adev->pdev = pdev;
+       ddev = adev_to_drm(adev);
  
         if (!supports_atomic)
-               dev->driver_features &= ~DRIVER_ATOMIC;
+               ddev->driver_features &= ~DRIVER_ATOMIC;
  
         ret = pci_enable_device(pdev);
         if (ret)
-               goto err_free;
-
-       dev->pdev = pdev;
+               return ret;
  
-       pci_set_drvdata(pdev, dev);
+       ddev->pdev = pdev;
+       pci_set_drvdata(pdev, ddev);
  
-       ret = amdgpu_driver_load_kms(dev, ent->driver_data);
+       ret = amdgpu_driver_load_kms(adev, ent->driver_data);
         if (ret)
                 goto err_pci;
  
  retry_init:
-       ret = drm_dev_register(dev, ent->driver_data);
+       ret = drm_dev_register(ddev, ent->driver_data);
         if (ret == -EAGAIN && ++retry <= 3) {
                 DRM_INFO("retry init %d\n", retry);
                 /* Don't request EX mode too frequently which is attacking */
                 msleep(5000);
                 goto retry_init;
-       } else if (ret)
+       } else if (ret) {
                 goto err_pci;
+       }
  
-       adev = dev->dev_private;
         ret = amdgpu_debugfs_init(adev);
         if (ret)
                 DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
@@ -1151,8 +1205,6 @@ retry_init:
  
  err_pci:
         pci_disable_device(pdev);
-err_free:
-       drm_dev_put(dev);
         return ret;
  }
  
@@ -1169,14 +1221,13 @@ amdgpu_pci_remove(struct pci_dev *pdev)
         amdgpu_driver_unload_kms(dev);
         pci_disable_device(pdev);
         pci_set_drvdata(pdev, NULL);
-       drm_dev_put(dev);
  }
  
  static void
  amdgpu_pci_shutdown(struct pci_dev *pdev)
  {
         struct drm_device *dev = pci_get_drvdata(pdev);
-       struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_device *adev = drm_to_adev(dev);
  
         if (amdgpu_ras_intr_triggered())
                 return;
@@ -1209,7 +1260,7 @@ static int amdgpu_pmops_resume(struct device *dev)
  static int amdgpu_pmops_freeze(struct device *dev)
  {
         struct drm_device *drm_dev = dev_get_drvdata(dev);
-       struct amdgpu_device *adev = drm_dev->dev_private;
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
         int r;
  
         adev->in_hibernate = true;
@@ -1245,7 +1296,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
  {
         struct pci_dev *pdev = to_pci_dev(dev);
         struct drm_device *drm_dev = pci_get_drvdata(pdev);
-       struct amdgpu_device *adev = drm_dev->dev_private;
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
         int ret, i;
  
         if (!adev->runpm) {
@@ -1279,7 +1330,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
                 if (amdgpu_is_atpx_hybrid()) {
                         pci_ignore_hotplug(pdev);
                 } else {
-                       pci_save_state(pdev);
+                       amdgpu_device_cache_pci_state(pdev);
                         pci_disable_device(pdev);
                         pci_ignore_hotplug(pdev);
                         pci_set_power_state(pdev, PCI_D3cold);
@@ -1296,7 +1347,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
  {
         struct pci_dev *pdev = to_pci_dev(dev);
         struct drm_device *drm_dev = pci_get_drvdata(pdev);
-       struct amdgpu_device *adev = drm_dev->dev_private;
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
         int ret;
  
         if (!adev->runpm)
@@ -1312,7 +1363,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
                         pci_set_master(pdev);
                 } else {
                         pci_set_power_state(pdev, PCI_D0);
-                       pci_restore_state(pdev);
+                       amdgpu_device_load_pci_state(pdev);
                         ret = pci_enable_device(pdev);
                         if (ret)
                                 return ret;
@@ -1332,7 +1383,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
  static int amdgpu_pmops_runtime_idle(struct device *dev)
  {
         struct drm_device *drm_dev = dev_get_drvdata(dev);
-       struct amdgpu_device *adev = drm_dev->dev_private;
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
         /* we don't want the main rpm_idle to call suspend - we want to autosuspend */
         int ret = 1;
  
@@ -1491,6 +1542,13 @@ static struct drm_driver kms_driver = {
         .patchlevel = KMS_DRIVER_PATCHLEVEL,
  };
  
+static struct pci_error_handlers amdgpu_pci_err_handler = {
+       .error_detected = amdgpu_pci_error_detected,
+       .mmio_enabled   = amdgpu_pci_mmio_enabled,
+       .slot_reset     = amdgpu_pci_slot_reset,
+       .resume         = amdgpu_pci_resume,
+};
+
  static struct pci_driver amdgpu_kms_pci_driver = {
         .name = DRIVER_NAME,
         .id_table = pciidlist,
@@ -1498,10 +1556,9 @@ static struct pci_driver amdgpu_kms_pci_driver = {
         .remove = amdgpu_pci_remove,
         .shutdown = amdgpu_pci_shutdown,
         .driver.pm = &amdgpu_pm_ops,
+       .err_handler = &amdgpu_pci_err_handler,
  };
  
-
-
  static int __init amdgpu_init(void)
  {
         int r;