drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * Authors: monk liu <[email protected]>
  23  */
  24
  25 #include <drm/drm_auth.h>
  26 #include <drm/drm_drv.h>
  27 #include "amdgpu.h"
  28 #include "amdgpu_sched.h"
  29 #include "amdgpu_ras.h"
  30 #include <linux/nospec.h>
  31
  32 #define to_amdgpu_ctx_entity(e) \
  33         container_of((e), struct amdgpu_ctx_entity, entity)
  34
  35 const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
  36         [AMDGPU_HW_IP_GFX]      =       1,
  37         [AMDGPU_HW_IP_COMPUTE]  =       4,
  38         [AMDGPU_HW_IP_DMA]      =       2,
  39         [AMDGPU_HW_IP_UVD]      =       1,
  40         [AMDGPU_HW_IP_VCE]      =       1,
  41         [AMDGPU_HW_IP_UVD_ENC]  =       1,
  42         [AMDGPU_HW_IP_VCN_DEC]  =       1,
  43         [AMDGPU_HW_IP_VCN_ENC]  =       1,
  44         [AMDGPU_HW_IP_VCN_JPEG] =       1,
  45 };
  46
  47 bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
  48 {
  49         switch (ctx_prio) {
  50         case AMDGPU_CTX_PRIORITY_UNSET:
  51         case AMDGPU_CTX_PRIORITY_VERY_LOW:
  52         case AMDGPU_CTX_PRIORITY_LOW:
  53         case AMDGPU_CTX_PRIORITY_NORMAL:
  54         case AMDGPU_CTX_PRIORITY_HIGH:
  55         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
  56                 return true;
  57         default:
  58                 return false;
  59         }
  60 }
  61
  62 static enum drm_sched_priority
  63 amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
  64 {
  65         switch (ctx_prio) {
  66         case AMDGPU_CTX_PRIORITY_UNSET:
  67                 return DRM_SCHED_PRIORITY_UNSET;
  68
  69         case AMDGPU_CTX_PRIORITY_VERY_LOW:
  70                 return DRM_SCHED_PRIORITY_MIN;
  71
  72         case AMDGPU_CTX_PRIORITY_LOW:
  73                 return DRM_SCHED_PRIORITY_MIN;
  74
  75         case AMDGPU_CTX_PRIORITY_NORMAL:
  76                 return DRM_SCHED_PRIORITY_NORMAL;
  77
  78         case AMDGPU_CTX_PRIORITY_HIGH:
  79                 return DRM_SCHED_PRIORITY_HIGH;
  80
  81         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
  82                 return DRM_SCHED_PRIORITY_HIGH;
  83
  84         /* This should not happen as we sanitized userspace provided priority
  85          * already, WARN if this happens.
  86          */
  87         default:
  88                 WARN(1, "Invalid context priority %d\n", ctx_prio);
  89                 return DRM_SCHED_PRIORITY_NORMAL;
  90         }
  91
  92 }
  93
  94 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
  95                                       int32_t priority)
  96 {
  97         if (!amdgpu_ctx_priority_is_valid(priority))
  98                 return -EINVAL;
  99
 100         /* NORMAL and below are accessible by everyone */
 101         if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
 102                 return 0;
 103
 104         if (capable(CAP_SYS_NICE))
 105                 return 0;
 106
 107         if (drm_is_current_master(filp))
 108                 return 0;
 109
 110         return -EACCES;
 111 }
 112
 113 static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
 114 {
 115         switch (prio) {
 116         case AMDGPU_CTX_PRIORITY_HIGH:
 117         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
 118                 return AMDGPU_GFX_PIPE_PRIO_HIGH;
 119         default:
 120                 return AMDGPU_GFX_PIPE_PRIO_NORMAL;
 121         }
 122 }
 123
 124 static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
 125 {
 126         switch (prio) {
 127         case AMDGPU_CTX_PRIORITY_HIGH:
 128                 return AMDGPU_RING_PRIO_1;
 129         case AMDGPU_CTX_PRIORITY_VERY_HIGH:
 130                 return AMDGPU_RING_PRIO_2;
 131         default:
 132                 return AMDGPU_RING_PRIO_0;
 133         }
 134 }
 135
 136 static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
 137 {
 138         struct amdgpu_device *adev = ctx->mgr->adev;
 139         unsigned int hw_prio;
 140         int32_t ctx_prio;
 141
 142         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 143                         ctx->init_priority : ctx->override_priority;
 144
 145         switch (hw_ip) {
 146         case AMDGPU_HW_IP_COMPUTE:
 147                 hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
 148                 break;
 149         case AMDGPU_HW_IP_VCE:
 150         case AMDGPU_HW_IP_VCN_ENC:
 151                 hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
 152                 break;
 153         default:
 154                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
 155                 break;
 156         }
 157
 158         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 159         if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
 160                 hw_prio = AMDGPU_RING_PRIO_DEFAULT;
 161
 162         return hw_prio;
 163 }
 164
 165 /* Calculate the time spend on the hw */
 166 static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
 167 {
 168         struct drm_sched_fence *s_fence;
 169
 170         if (!fence)
 171                 return ns_to_ktime(0);
 172
 173         /* When the fence is not even scheduled it can't have spend time */
 174         s_fence = to_drm_sched_fence(fence);
 175         if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
 176                 return ns_to_ktime(0);
 177
 178         /* When it is still running account how much already spend */
 179         if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
 180                 return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
 181
 182         return ktime_sub(s_fence->finished.timestamp,
 183                          s_fence->scheduled.timestamp);
 184 }
 185
 186 static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
 187                                       struct amdgpu_ctx_entity *centity)
 188 {
 189         ktime_t res = ns_to_ktime(0);
 190         uint32_t i;
 191
 192         spin_lock(&ctx->ring_lock);
 193         for (i = 0; i < amdgpu_sched_jobs; i++) {
 194                 res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
 195         }
 196         spin_unlock(&ctx->ring_lock);
 197         return res;
 198 }
 199
 200 static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
 201                                   const u32 ring)
 202 {
 203         struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
 204         struct amdgpu_device *adev = ctx->mgr->adev;
 205         struct amdgpu_ctx_entity *entity;
 206         enum drm_sched_priority drm_prio;
 207         unsigned int hw_prio, num_scheds;
 208         int32_t ctx_prio;
 209         int r;
 210
 211         entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
 212                          GFP_KERNEL);
 213         if (!entity)
 214                 return  -ENOMEM;
 215
 216         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 217                         ctx->init_priority : ctx->override_priority;
 218         entity->hw_ip = hw_ip;
 219         entity->sequence = 1;
 220         hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 221         drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
 222
 223         hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
 224         scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 225         num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 226
 227         /* disable load balance if the hw engine retains context among dependent jobs */
 228         if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
 229             hw_ip == AMDGPU_HW_IP_VCN_DEC ||
 230             hw_ip == AMDGPU_HW_IP_UVD_ENC ||
 231             hw_ip == AMDGPU_HW_IP_UVD) {
 232                 sched = drm_sched_pick_best(scheds, num_scheds);
 233                 scheds = &sched;
 234                 num_scheds = 1;
 235         }
 236
 237         r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
 238                                   &ctx->guilty);
 239         if (r)
 240                 goto error_free_entity;
 241
 242         /* It's not an error if we fail to install the new entity */
 243         if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
 244                 goto cleanup_entity;
 245
 246         return 0;
 247
 248 cleanup_entity:
 249         drm_sched_entity_fini(&entity->entity);
 250
 251 error_free_entity:
 252         kfree(entity);
 253
 254         return r;
 255 }
 256
 257 static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
 258 {
 259         ktime_t res = ns_to_ktime(0);
 260         int i;
 261
 262         if (!entity)
 263                 return res;
 264
 265         for (i = 0; i < amdgpu_sched_jobs; ++i) {
 266                 res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
 267                 dma_fence_put(entity->fences[i]);
 268         }
 269
 270         kfree(entity);
 271         return res;
 272 }
 273
 274 static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 275                            struct drm_file *filp, struct amdgpu_ctx *ctx)
 276 {
 277         int r;
 278
 279         r = amdgpu_ctx_priority_permit(filp, priority);
 280         if (r)
 281                 return r;
 282
 283         memset(ctx, 0, sizeof(*ctx));
 284
 285         kref_init(&ctx->refcount);
 286         ctx->mgr = mgr;
 287         spin_lock_init(&ctx->ring_lock);
 288         mutex_init(&ctx->lock);
 289
 290         ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
 291         ctx->reset_counter_query = ctx->reset_counter;
 292         ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
 293         ctx->init_priority = priority;
 294         ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
 295         ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
 296
 297         return 0;
 298 }
 299
 300 static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
 301                                         u32 *stable_pstate)
 302 {
 303         struct amdgpu_device *adev = ctx->mgr->adev;
 304         enum amd_dpm_forced_level current_level;
 305
 306         current_level = amdgpu_dpm_get_performance_level(adev);
 307
 308         switch (current_level) {
 309         case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
 310                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
 311                 break;
 312         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
 313                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
 314                 break;
 315         case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
 316                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
 317                 break;
 318         case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
 319                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
 320                 break;
 321         default:
 322                 *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
 323                 break;
 324         }
 325         return 0;
 326 }
 327
 328 static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
 329                                         u32 stable_pstate)
 330 {
 331         struct amdgpu_device *adev = ctx->mgr->adev;
 332         enum amd_dpm_forced_level level;
 333         u32 current_stable_pstate;
 334         int r;
 335
 336         mutex_lock(&adev->pm.stable_pstate_ctx_lock);
 337         if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
 338                 r = -EBUSY;
 339                 goto done;
 340         }
 341
 342         r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
 343         if (r || (stable_pstate == current_stable_pstate))
 344                 goto done;
 345
 346         switch (stable_pstate) {
 347         case AMDGPU_CTX_STABLE_PSTATE_NONE:
 348                 level = AMD_DPM_FORCED_LEVEL_AUTO;
 349                 break;
 350         case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
 351                 level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
 352                 break;
 353         case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
 354                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
 355                 break;
 356         case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
 357                 level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
 358                 break;
 359         case AMDGPU_CTX_STABLE_PSTATE_PEAK:
 360                 level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
 361                 break;
 362         default:
 363                 r = -EINVAL;
 364                 goto done;
 365         }
 366
 367         r = amdgpu_dpm_force_performance_level(adev, level);
 368
 369         if (level == AMD_DPM_FORCED_LEVEL_AUTO)
 370                 adev->pm.stable_pstate_ctx = NULL;
 371         else
 372                 adev->pm.stable_pstate_ctx = ctx;
 373 done:
 374         mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
 375
 376         return r;
 377 }
 378
 379 static void amdgpu_ctx_fini(struct kref *ref)
 380 {
 381         struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 382         struct amdgpu_ctx_mgr *mgr = ctx->mgr;
 383         struct amdgpu_device *adev = mgr->adev;
 384         unsigned i, j, idx;
 385
 386         if (!adev)
 387                 return;
 388
 389         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 390                 for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
 391                         ktime_t spend;
 392
 393                         spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
 394                         atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
 395                 }
 396         }
 397
 398         if (drm_dev_enter(&adev->ddev, &idx)) {
 399                 amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
 400                 drm_dev_exit(idx);
 401         }
 402
 403         mutex_destroy(&ctx->lock);
 404         kfree(ctx);
 405 }
 406
 407 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 408                           u32 ring, struct drm_sched_entity **entity)
 409 {
 410         int r;
 411
 412         if (hw_ip >= AMDGPU_HW_IP_NUM) {
 413                 DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 414                 return -EINVAL;
 415         }
 416
 417         /* Right now all IPs have only one instance - multiple rings. */
 418         if (instance != 0) {
 419                 DRM_DEBUG("invalid ip instance: %d\n", instance);
 420                 return -EINVAL;
 421         }
 422
 423         if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
 424                 DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
 425                 return -EINVAL;
 426         }
 427
 428         if (ctx->entities[hw_ip][ring] == NULL) {
 429                 r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
 430                 if (r)
 431                         return r;
 432         }
 433
 434         *entity = &ctx->entities[hw_ip][ring]->entity;
 435         return 0;
 436 }
 437
 438 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 439                             struct amdgpu_fpriv *fpriv,
 440                             struct drm_file *filp,
 441                             int32_t priority,
 442                             uint32_t *id)
 443 {
 444         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 445         struct amdgpu_ctx *ctx;
 446         int r;
 447
 448         ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 449         if (!ctx)
 450                 return -ENOMEM;
 451
 452         mutex_lock(&mgr->lock);
 453         r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
 454         if (r < 0) {
 455                 mutex_unlock(&mgr->lock);
 456                 kfree(ctx);
 457                 return r;
 458         }
 459
 460         *id = (uint32_t)r;
 461         r = amdgpu_ctx_init(mgr, priority, filp, ctx);
 462         if (r) {
 463                 idr_remove(&mgr->ctx_handles, *id);
 464                 *id = 0;
 465                 kfree(ctx);
 466         }
 467         mutex_unlock(&mgr->lock);
 468         return r;
 469 }
 470
 471 static void amdgpu_ctx_do_release(struct kref *ref)
 472 {
 473         struct amdgpu_ctx *ctx;
 474         u32 i, j;
 475
 476         ctx = container_of(ref, struct amdgpu_ctx, refcount);
 477         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 478                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 479                         if (!ctx->entities[i][j])
 480                                 continue;
 481
 482                         drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
 483                 }
 484         }
 485
 486         amdgpu_ctx_fini(ref);
 487 }
 488
 489 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 490 {
 491         struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 492         struct amdgpu_ctx *ctx;
 493
 494         mutex_lock(&mgr->lock);
 495         ctx = idr_remove(&mgr->ctx_handles, id);
 496         if (ctx)
 497                 kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 498         mutex_unlock(&mgr->lock);
 499         return ctx ? 0 : -EINVAL;
 500 }
 501
 502 static int amdgpu_ctx_query(struct amdgpu_device *adev,
 503                             struct amdgpu_fpriv *fpriv, uint32_t id,
 504                             union drm_amdgpu_ctx_out *out)
 505 {
 506         struct amdgpu_ctx *ctx;
 507         struct amdgpu_ctx_mgr *mgr;
 508         unsigned reset_counter;
 509
 510         if (!fpriv)
 511                 return -EINVAL;
 512
 513         mgr = &fpriv->ctx_mgr;
 514         mutex_lock(&mgr->lock);
 515         ctx = idr_find(&mgr->ctx_handles, id);
 516         if (!ctx) {
 517                 mutex_unlock(&mgr->lock);
 518                 return -EINVAL;
 519         }
 520
 521         /* TODO: these two are always zero */
 522         out->state.flags = 0x0;
 523         out->state.hangs = 0x0;
 524
 525         /* determine if a GPU reset has occured since the last call */
 526         reset_counter = atomic_read(&adev->gpu_reset_counter);
 527         /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
 528         if (ctx->reset_counter_query == reset_counter)
 529                 out->state.reset_status = AMDGPU_CTX_NO_RESET;
 530         else
 531                 out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
 532         ctx->reset_counter_query = reset_counter;
 533
 534         mutex_unlock(&mgr->lock);
 535         return 0;
 536 }
 537
 538 #define AMDGPU_RAS_COUNTE_DELAY_MS 3000
 539
 540 static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 541                              struct amdgpu_fpriv *fpriv, uint32_t id,
 542                              union drm_amdgpu_ctx_out *out)
 543 {
 544         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 545         struct amdgpu_ctx *ctx;
 546         struct amdgpu_ctx_mgr *mgr;
 547
 548         if (!fpriv)
 549                 return -EINVAL;
 550
 551         mgr = &fpriv->ctx_mgr;
 552         mutex_lock(&mgr->lock);
 553         ctx = idr_find(&mgr->ctx_handles, id);
 554         if (!ctx) {
 555                 mutex_unlock(&mgr->lock);
 556                 return -EINVAL;
 557         }
 558
 559         out->state.flags = 0x0;
 560         out->state.hangs = 0x0;
 561
 562         if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
 563                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
 564
 565         if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
 566                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
 567
 568         if (atomic_read(&ctx->guilty))
 569                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 570
 571         if (adev->ras_enabled && con) {
 572                 /* Return the cached values in O(1),
 573                  * and schedule delayed work to cache
 574                  * new vaues.
 575                  */
 576                 int ce_count, ue_count;
 577
 578                 ce_count = atomic_read(&con->ras_ce_count);
 579                 ue_count = atomic_read(&con->ras_ue_count);
 580
 581                 if (ce_count != ctx->ras_counter_ce) {
 582                         ctx->ras_counter_ce = ce_count;
 583                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
 584                 }
 585
 586                 if (ue_count != ctx->ras_counter_ue) {
 587                         ctx->ras_counter_ue = ue_count;
 588                         out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
 589                 }
 590
 591                 schedule_delayed_work(&con->ras_counte_delay_work,
 592                                       msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
 593         }
 594
 595         mutex_unlock(&mgr->lock);
 596         return 0;
 597 }
 598
 599
 600
 601 static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
 602                                     struct amdgpu_fpriv *fpriv, uint32_t id,
 603                                     bool set, u32 *stable_pstate)
 604 {
 605         struct amdgpu_ctx *ctx;
 606         struct amdgpu_ctx_mgr *mgr;
 607         int r;
 608
 609         if (!fpriv)
 610                 return -EINVAL;
 611
 612         mgr = &fpriv->ctx_mgr;
 613         mutex_lock(&mgr->lock);
 614         ctx = idr_find(&mgr->ctx_handles, id);
 615         if (!ctx) {
 616                 mutex_unlock(&mgr->lock);
 617                 return -EINVAL;
 618         }
 619
 620         if (set)
 621                 r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
 622         else
 623                 r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
 624
 625         mutex_unlock(&mgr->lock);
 626         return r;
 627 }
 628
 629 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 630                      struct drm_file *filp)
 631 {
 632         int r;
 633         uint32_t id, stable_pstate;
 634         int32_t priority;
 635
 636         union drm_amdgpu_ctx *args = data;
 637         struct amdgpu_device *adev = drm_to_adev(dev);
 638         struct amdgpu_fpriv *fpriv = filp->driver_priv;
 639
 640         id = args->in.ctx_id;
 641         priority = args->in.priority;
 642
 643         /* For backwards compatibility reasons, we need to accept
 644          * ioctls with garbage in the priority field */
 645         if (!amdgpu_ctx_priority_is_valid(priority))
 646                 priority = AMDGPU_CTX_PRIORITY_NORMAL;
 647
 648         switch (args->in.op) {
 649         case AMDGPU_CTX_OP_ALLOC_CTX:
 650                 r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
 651                 args->out.alloc.ctx_id = id;
 652                 break;
 653         case AMDGPU_CTX_OP_FREE_CTX:
 654                 r = amdgpu_ctx_free(fpriv, id);
 655                 break;
 656         case AMDGPU_CTX_OP_QUERY_STATE:
 657                 r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 658                 break;
 659         case AMDGPU_CTX_OP_QUERY_STATE2:
 660                 r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
 661                 break;
 662         case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
 663                 if (args->in.flags)
 664                         return -EINVAL;
 665                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
 666                 if (!r)
 667                         args->out.pstate.flags = stable_pstate;
 668                 break;
 669         case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
 670                 if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
 671                         return -EINVAL;
 672                 stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
 673                 if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
 674                         return -EINVAL;
 675                 r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
 676                 break;
 677         default:
 678                 return -EINVAL;
 679         }
 680
 681         return r;
 682 }
 683
 684 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
 685 {
 686         struct amdgpu_ctx *ctx;
 687         struct amdgpu_ctx_mgr *mgr;
 688
 689         if (!fpriv)
 690                 return NULL;
 691
 692         mgr = &fpriv->ctx_mgr;
 693
 694         mutex_lock(&mgr->lock);
 695         ctx = idr_find(&mgr->ctx_handles, id);
 696         if (ctx)
 697                 kref_get(&ctx->refcount);
 698         mutex_unlock(&mgr->lock);
 699         return ctx;
 700 }
 701
 702 int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 703 {
 704         if (ctx == NULL)
 705                 return -EINVAL;
 706
 707         kref_put(&ctx->refcount, amdgpu_ctx_do_release);
 708         return 0;
 709 }
 710
 711 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 712                               struct drm_sched_entity *entity,
 713                               struct dma_fence *fence)
 714 {
 715         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 716         uint64_t seq = centity->sequence;
 717         struct dma_fence *other = NULL;
 718         unsigned idx = 0;
 719
 720         idx = seq & (amdgpu_sched_jobs - 1);
 721         other = centity->fences[idx];
 722         WARN_ON(other && !dma_fence_is_signaled(other));
 723
 724         dma_fence_get(fence);
 725
 726         spin_lock(&ctx->ring_lock);
 727         centity->fences[idx] = fence;
 728         centity->sequence++;
 729         spin_unlock(&ctx->ring_lock);
 730
 731         atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
 732                      &ctx->mgr->time_spend[centity->hw_ip]);
 733
 734         dma_fence_put(other);
 735         return seq;
 736 }
 737
 738 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 739                                        struct drm_sched_entity *entity,
 740                                        uint64_t seq)
 741 {
 742         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 743         struct dma_fence *fence;
 744
 745         spin_lock(&ctx->ring_lock);
 746
 747         if (seq == ~0ull)
 748                 seq = centity->sequence - 1;
 749
 750         if (seq >= centity->sequence) {
 751                 spin_unlock(&ctx->ring_lock);
 752                 return ERR_PTR(-EINVAL);
 753         }
 754
 755
 756         if (seq + amdgpu_sched_jobs < centity->sequence) {
 757                 spin_unlock(&ctx->ring_lock);
 758                 return NULL;
 759         }
 760
 761         fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 762         spin_unlock(&ctx->ring_lock);
 763
 764         return fence;
 765 }
 766
 767 static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
 768                                            struct amdgpu_ctx_entity *aentity,
 769                                            int hw_ip,
 770                                            int32_t priority)
 771 {
 772         struct amdgpu_device *adev = ctx->mgr->adev;
 773         unsigned int hw_prio;
 774         struct drm_gpu_scheduler **scheds = NULL;
 775         unsigned num_scheds;
 776
 777         /* set sw priority */
 778         drm_sched_entity_set_priority(&aentity->entity,
 779                                       amdgpu_ctx_to_drm_sched_prio(priority));
 780
 781         /* set hw priority */
 782         if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
 783                 hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
 784                 hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
 785                 scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
 786                 num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
 787                 drm_sched_entity_modify_sched(&aentity->entity, scheds,
 788                                               num_scheds);
 789         }
 790 }
 791
 792 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 793                                   int32_t priority)
 794 {
 795         int32_t ctx_prio;
 796         unsigned i, j;
 797
 798         ctx->override_priority = priority;
 799
 800         ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
 801                         ctx->init_priority : ctx->override_priority;
 802         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 803                 for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 804                         if (!ctx->entities[i][j])
 805                                 continue;
 806
 807                         amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
 808                                                        i, ctx_prio);
 809                 }
 810         }
 811 }
 812
 813 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
 814                                struct drm_sched_entity *entity)
 815 {
 816         struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 817         struct dma_fence *other;
 818         unsigned idx;
 819         long r;
 820
 821         spin_lock(&ctx->ring_lock);
 822         idx = centity->sequence & (amdgpu_sched_jobs - 1);
 823         other = dma_fence_get(centity->fences[idx]);
 824         spin_unlock(&ctx->ring_lock);
 825
 826         if (!other)
 827                 return 0;
 828
 829         r = dma_fence_wait(other, true);
 830         if (r < 0 && r != -ERESTARTSYS)
 831                 DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 832
 833         dma_fence_put(other);
 834         return r;
 835 }
 836
 837 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
 838                          struct amdgpu_device *adev)
 839 {
 840         unsigned int i;
 841
 842         mgr->adev = adev;
 843         mutex_init(&mgr->lock);
 844         idr_init(&mgr->ctx_handles);
 845
 846         for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
 847                 atomic64_set(&mgr->time_spend[i], 0);
 848 }
 849
 850 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 851 {
 852         struct amdgpu_ctx *ctx;
 853         struct idr *idp;
 854         uint32_t id, i, j;
 855
 856         idp = &mgr->ctx_handles;
 857
 858         mutex_lock(&mgr->lock);
 859         idr_for_each_entry(idp, ctx, id) {
 860                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 861                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 862                                 struct drm_sched_entity *entity;
 863
 864                                 if (!ctx->entities[i][j])
 865                                         continue;
 866
 867                                 entity = &ctx->entities[i][j]->entity;
 868                                 timeout = drm_sched_entity_flush(entity, timeout);
 869                         }
 870                 }
 871         }
 872         mutex_unlock(&mgr->lock);
 873         return timeout;
 874 }
 875
 876 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 877 {
 878         struct amdgpu_ctx *ctx;
 879         struct idr *idp;
 880         uint32_t id, i, j;
 881
 882         idp = &mgr->ctx_handles;
 883
 884         idr_for_each_entry(idp, ctx, id) {
 885                 if (kref_read(&ctx->refcount) != 1) {
 886                         DRM_ERROR("ctx %p is still alive\n", ctx);
 887                         continue;
 888                 }
 889
 890                 for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 891                         for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 892                                 struct drm_sched_entity *entity;
 893
 894                                 if (!ctx->entities[i][j])
 895                                         continue;
 896
 897                                 entity = &ctx->entities[i][j]->entity;
 898                                 drm_sched_entity_fini(entity);
 899                         }
 900                 }
 901         }
 902 }
 903
 904 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 905 {
 906         struct amdgpu_ctx *ctx;
 907         struct idr *idp;
 908         uint32_t id;
 909
 910         amdgpu_ctx_mgr_entity_fini(mgr);
 911
 912         idp = &mgr->ctx_handles;
 913
 914         idr_for_each_entry(idp, ctx, id) {
 915                 if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 916                         DRM_ERROR("ctx %p is still alive\n", ctx);
 917         }
 918
 919         idr_destroy(&mgr->ctx_handles);
 920         mutex_destroy(&mgr->lock);
 921 }
 922
 923 void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
 924                           ktime_t usage[AMDGPU_HW_IP_NUM])
 925 {
 926         struct amdgpu_ctx *ctx;
 927         unsigned int hw_ip, i;
 928         uint32_t id;
 929
 930         /*
 931          * This is a little bit racy because it can be that a ctx or a fence are
 932          * destroyed just in the moment we try to account them. But that is ok
 933          * since exactly that case is explicitely allowed by the interface.
 934          */
 935         mutex_lock(&mgr->lock);
 936         for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
 937                 uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
 938
 939                 usage[hw_ip] = ns_to_ktime(ns);
 940         }
 941
 942         idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
 943                 for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
 944                         for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
 945                                 struct amdgpu_ctx_entity *centity;
 946                                 ktime_t spend;
 947
 948                                 centity = ctx->entities[hw_ip][i];
 949                                 if (!centity)
 950                                         continue;
 951                                 spend = amdgpu_ctx_entity_time(ctx, centity);
 952                                 usage[hw_ip] = ktime_add(usage[hw_ip], spend);
 953                         }
 954                 }
 955         }
 956         mutex_unlock(&mgr->lock);
 957 }