]> Git Repo - linux.git/blobdiff - drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drm/amdgpu: optionally do a writeback but don't invalidate TC for IB fences
[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ib.c
index a162d87ca0c8e7bedf2d936f4cd63abf98fe534e..f70eeed9ed76fa893dabe2218c4c85c4b4aec104 100644 (file)
@@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        struct amdgpu_vm *vm;
        uint64_t fence_ctx;
        uint32_t status = 0, alloc_size;
+       unsigned fence_flags = 0;
 
        unsigned i;
        int r = 0;
@@ -181,15 +182,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                }
        }
 
-       if (ring->funcs->init_cond_exec)
+       if (job && ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-       if (ring->funcs->emit_hdp_flush
 #ifdef CONFIG_X86_64
-           && !(adev->flags & AMD_IS_APU)
+       if (!(adev->flags & AMD_IS_APU))
 #endif
-          )
-               amdgpu_ring_emit_hdp_flush(ring);
+       {
+               if (ring->funcs->emit_hdp_flush)
+                       amdgpu_ring_emit_hdp_flush(ring);
+               else
+                       amdgpu_asic_flush_hdp(adev, ring);
+       }
 
        skip_preamble = ring->current_ctx == fence_ctx;
        need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,14 +223,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        if (ring->funcs->emit_tmz)
                amdgpu_ring_emit_tmz(ring, false);
 
-       if (ring->funcs->emit_hdp_invalidate
 #ifdef CONFIG_X86_64
-           && !(adev->flags & AMD_IS_APU)
+       if (!(adev->flags & AMD_IS_APU))
 #endif
-          )
-               amdgpu_ring_emit_hdp_invalidate(ring);
+               amdgpu_asic_invalidate_hdp(adev, ring);
+
+       if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+               fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
 
-       r = amdgpu_fence_emit(ring, f);
+       r = amdgpu_fence_emit(ring, f, fence_flags);
        if (r) {
                dev_err(adev->dev, "failed to emit fence (%d)\n", r);
                if (job && job->vmid)
@@ -241,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        /* wrap the last IB with fence */
        if (job && job->uf_addr) {
                amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-                                      AMDGPU_FENCE_FLAG_64BIT);
+                                      fence_flags | AMDGPU_FENCE_FLAG_64BIT);
        }
 
        if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
@@ -278,11 +283,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
                return r;
        }
 
-       r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
-       if (r) {
-               return r;
-       }
-
        adev->ib_pool_ready = true;
        if (amdgpu_debugfs_sa_init(adev)) {
                dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +301,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 {
        if (adev->ib_pool_ready) {
-               amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
                amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
                adev->ib_pool_ready = false;
        }
@@ -321,14 +320,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
        unsigned i;
        int r, ret = 0;
+       long tmo_gfx, tmo_mm;
+
+       tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+       if (amdgpu_sriov_vf(adev)) {
+               /* for MM engines in hypervisor side they are not scheduled together
+                * with CP and SDMA engines, so even in exclusive mode MM engine could
+                * still running on other VF thus the IB TEST TIMEOUT for MM engines
+                * under SR-IOV should be set to a long time. 8 sec should be enough
+                * for the MM comes back to this VF.
+                */
+               tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
+
+       if (amdgpu_sriov_runtime(adev)) {
+               /* for CP & SDMA engines since they are scheduled together so
+                * need to make the timeout width enough to cover the time
+                * cost waiting for it coming back under RUNTIME only
+               */
+               tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+       }
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                struct amdgpu_ring *ring = adev->rings[i];
+               long tmo;
 
                if (!ring || !ring->ready)
                        continue;
 
-               r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+               /* MM engine need more time */
+               if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+                       tmo = tmo_mm;
+               else
+                       tmo = tmo_gfx;
+
+               r = amdgpu_ring_test_ib(ring, tmo);
                if (r) {
                        ring->ready = false;
 
This page took 0.039199 seconds and 4 git commands to generate.