drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ib.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index a162d87ca0c8e7bedf2d936f4cd63abf98fe534e..f70eeed9ed76fa893dabe2218c4c85c4b4aec104 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
         struct amdgpu_vm *vm;
         uint64_t fence_ctx;
         uint32_t status = 0, alloc_size;
+       unsigned fence_flags = 0;
  
         unsigned i;
         int r = 0;
@@ -181,15 +182,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                 }
         }
  
-       if (ring->funcs->init_cond_exec)
+       if (job && ring->funcs->init_cond_exec)
                 patch_offset = amdgpu_ring_init_cond_exec(ring);
  
-       if (ring->funcs->emit_hdp_flush
  #ifdef CONFIG_X86_64
-           && !(adev->flags & AMD_IS_APU)
+       if (!(adev->flags & AMD_IS_APU))
  #endif
-          )
-               amdgpu_ring_emit_hdp_flush(ring);
+       {
+               if (ring->funcs->emit_hdp_flush)
+                       amdgpu_ring_emit_hdp_flush(ring);
+               else
+                       amdgpu_asic_flush_hdp(adev, ring);
+       }
  
         skip_preamble = ring->current_ctx == fence_ctx;
         need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,14 +223,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
         if (ring->funcs->emit_tmz)
                 amdgpu_ring_emit_tmz(ring, false);
  
-       if (ring->funcs->emit_hdp_invalidate
  #ifdef CONFIG_X86_64
-           && !(adev->flags & AMD_IS_APU)
+       if (!(adev->flags & AMD_IS_APU))
  #endif
-          )
-               amdgpu_ring_emit_hdp_invalidate(ring);
+               amdgpu_asic_invalidate_hdp(adev, ring);
+
+       if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+               fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
  
-       r = amdgpu_fence_emit(ring, f);
+       r = amdgpu_fence_emit(ring, f, fence_flags);
         if (r) {
                 dev_err(adev->dev, "failed to emit fence (%d)\n", r);
                 if (job && job->vmid)
@@ -241,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
         /* wrap the last IB with fence */
         if (job && job->uf_addr) {
                 amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-                                      AMDGPU_FENCE_FLAG_64BIT);
+                                      fence_flags | AMDGPU_FENCE_FLAG_64BIT);
         }
  
         if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
@@ -278,11 +283,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
                 return r;
         }
  
-       r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
-       if (r) {
-               return r;
-       }
-
         adev->ib_pool_ready = true;
         if (amdgpu_debugfs_sa_init(adev)) {
                 dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +301,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
  void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
  {
         if (adev->ib_pool_ready) {
-               amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
                 amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
                 adev->ib_pool_ready = false;
         }
@@ -321,14 +320,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
  {
         unsigned i;
         int r, ret = 0;
+       long tmo_gfx, tmo_mm;
+
+       tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+       if (amdgpu_sriov_vf(adev)) {
+               /* for MM engines in hypervisor side they are not scheduled together
+                * with CP and SDMA engines, so even in exclusive mode MM engine could
+                * still running on other VF thus the IB TEST TIMEOUT for MM engines
+                * under SR-IOV should be set to a long time. 8 sec should be enough
+                * for the MM comes back to this VF.
+                */
+               tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
+
+       if (amdgpu_sriov_runtime(adev)) {
+               /* for CP & SDMA engines since they are scheduled together so
+                * need to make the timeout width enough to cover the time
+                * cost waiting for it coming back under RUNTIME only
+               */
+               tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
  
         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                 struct amdgpu_ring *ring = adev->rings[i];
+               long tmo;
  
                 if (!ring || !ring->ready)
                         continue;
  
-               r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+               /* MM engine need more time */
+               if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+                       tmo = tmo_mm;
+               else
+                       tmo = tmo_gfx;
+
+               r = amdgpu_ring_test_ib(ring, tmo);
                 if (r) {
                         ring->ready = false;