Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

[linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ctx.c
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

index 6819fe5612d9e7a1a1f2b9a148b0990f5533ba68..e7a010b7ca1f81931f9ff176b8f854899e7a83ec 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -331,10 +331,13 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev,
         return 0;
  }
  
+#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
+
  static int amdgpu_ctx_query2(struct amdgpu_device *adev,
-       struct amdgpu_fpriv *fpriv, uint32_t id,
-       union drm_amdgpu_ctx_out *out)
+                            struct amdgpu_fpriv *fpriv, uint32_t id,
+                            union drm_amdgpu_ctx_out *out)
  {
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
         struct amdgpu_ctx *ctx;
         struct amdgpu_ctx_mgr *mgr;
  
@@ -361,6 +364,30 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
         if (atomic_read(&ctx->guilty))
                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
  
+       if (adev->ras_enabled && con) {
+               /* Return the cached values in O(1),
+                * and schedule delayed work to cache
+                * new vaues.
+                */
+               int ce_count, ue_count;
+
+               ce_count = atomic_read(&con->ras_ce_count);
+               ue_count = atomic_read(&con->ras_ue_count);
+
+               if (ce_count != ctx->ras_counter_ce) {
+                       ctx->ras_counter_ce = ce_count;
+                       out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+               }
+
+               if (ue_count != ctx->ras_counter_ue) {
+                       ctx->ras_counter_ue = ue_count;
+                       out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+               }
+
+               schedule_delayed_work(&con->ras_counte_delay_work,
+                                     msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
+       }
+
         mutex_unlock(&mgr->lock);
         return 0;
  }
@@ -635,3 +662,81 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
         idr_destroy(&mgr->ctx_handles);
         mutex_destroy(&mgr->lock);
  }
+
+static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
+               struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max)
+{
+       ktime_t now, t1;
+       uint32_t i;
+
+       *total = *max = 0;
+
+       now = ktime_get();
+       for (i = 0; i < amdgpu_sched_jobs; i++) {
+               struct dma_fence *fence;
+               struct drm_sched_fence *s_fence;
+
+               spin_lock(&ctx->ring_lock);
+               fence = dma_fence_get(centity->fences[i]);
+               spin_unlock(&ctx->ring_lock);
+               if (!fence)
+                       continue;
+               s_fence = to_drm_sched_fence(fence);
+               if (!dma_fence_is_signaled(&s_fence->scheduled)) {
+                       dma_fence_put(fence);
+                       continue;
+               }
+               t1 = s_fence->scheduled.timestamp;
+               if (!ktime_before(t1, now)) {
+                       dma_fence_put(fence);
+                       continue;
+               }
+               if (dma_fence_is_signaled(&s_fence->finished) &&
+                       s_fence->finished.timestamp < now)
+                       *total += ktime_sub(s_fence->finished.timestamp, t1);
+               else
+                       *total += ktime_sub(now, t1);
+               t1 = ktime_sub(now, t1);
+               dma_fence_put(fence);
+               *max = max(t1, *max);
+       }
+}
+
+ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
+               uint32_t idx, uint64_t *elapsed)
+{
+       struct idr *idp;
+       struct amdgpu_ctx *ctx;
+       uint32_t id;
+       struct amdgpu_ctx_entity *centity;
+       ktime_t total = 0, max = 0;
+
+       if (idx >= AMDGPU_MAX_ENTITY_NUM)
+               return 0;
+       idp = &mgr->ctx_handles;
+       mutex_lock(&mgr->lock);
+       idr_for_each_entry(idp, ctx, id) {
+               ktime_t ttotal, tmax;
+
+               if (!ctx->entities[hwip][idx])
+                       continue;
+
+               centity = ctx->entities[hwip][idx];
+               amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
+
+               /* Harmonic mean approximation diverges for very small
+                * values. If ratio < 0.01% ignore
+                */
+               if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
+                       continue;
+
+               total = ktime_add(total, ttotal);
+               max = ktime_after(tmax, max) ? tmax : max;
+       }
+
+       mutex_unlock(&mgr->lock);
+       if (elapsed)
+               *elapsed = max;
+
+       return total;
+}